aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386/xmmintrin.h
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/xmmintrin.h')
-rw-r--r--gcc/config/i386/xmmintrin.h115
1 files changed, 108 insertions, 7 deletions
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 4136e901795..43a05c1a6ee 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -475,6 +475,16 @@ _mm_cvtss_si32 (__m128 __A)
return __builtin_ia32_cvtss2si ((__v4sf) __A);
}
+#ifdef __x86_64__
+/* Convert the lower SPFP value to a 32-bit integer according to the current
+ rounding mode. */
+static __inline long long
+_mm_cvtss_si64x (__m128 __A)
+{
+ return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+}
+#endif
+
/* Convert the two lower SPFP values to 32-bit integers according to the
current rounding mode. Return the integers in packed form. */
static __inline __m64
@@ -490,6 +500,15 @@ _mm_cvttss_si32 (__m128 __A)
return __builtin_ia32_cvttss2si ((__v4sf) __A);
}
+#ifdef __x86_64__
+/* Truncate the lower SPFP value to a 32-bit integer. */
+static __inline long long
+_mm_cvttss_si64x (__m128 __A)
+{
+ return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
+}
+#endif
+
/* Truncate the two lower SPFP values to 32-bit integers. Return the
integers in packed form. */
static __inline __m64
@@ -505,6 +524,15 @@ _mm_cvtsi32_ss (__m128 __A, int __B)
return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
}
+#ifdef __x86_64__
+/* Convert B to a SPFP value and insert it as element zero in A. */
+static __inline __m128
+_mm_cvtsi64x_ss (__m128 __A, long long __B)
+{
+ return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+}
+#endif
+
/* Convert the two 32-bit values in B to SPFP form and insert them
as the two lower elements in A. */
static __inline __m128
@@ -1586,13 +1614,13 @@ _mm_ucomineq_sd (__m128d __A, __m128d __B)
static __inline __m128i
_mm_load_si128 (__m128i const *__P)
{
- return (__m128i) __builtin_ia32_loaddqa (__P);
+ return (__m128i) __builtin_ia32_loaddqa ((char const *)__P);
}
static __inline __m128i
_mm_loadu_si128 (__m128i const *__P)
{
- return (__m128i) __builtin_ia32_loaddqu (__P);
+ return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
}
static __inline __m128i
@@ -1604,13 +1632,13 @@ _mm_loadl_epi64 (__m128i const *__P)
static __inline void
_mm_store_si128 (__m128i *__P, __m128i __B)
{
- __builtin_ia32_storedqa (__P, (__v16qi)__B);
+ __builtin_ia32_storedqa ((char *)__P, (__v16qi)__B);
}
static __inline void
_mm_storeu_si128 (__m128i *__P, __m128i __B)
{
- __builtin_ia32_storedqu (__P, (__v16qi)__B);
+ __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
}
static __inline void
@@ -1619,6 +1647,12 @@ _mm_storel_epi64 (__m128i *__P, __m128i __B)
*(long long *)__P = __builtin_ia32_movdq2q ((__v2di)__B);
}
+static __inline __m64
+_mm_movepi64_pi64 (__m128i __B)
+{
+ return (__m64) __builtin_ia32_movdq2q ((__v2di)__B);
+}
+
static __inline __m128i
_mm_move_epi64 (__m128i __A)
{
@@ -1656,6 +1690,24 @@ _mm_set_epi32 (int __Z, int __Y, int __X, int __W)
return __u.__v;
}
+
+#ifdef __x86_64__
+/* Create the vector [Z Y]. */
+static __inline __m128i
+_mm_set_epi64x (long long __Z, long long __Y)
+{
+ union {
+ long __a[2];
+ __m128i __v;
+ } __u;
+
+ __u.__a[0] = __Y;
+ __u.__a[1] = __Z;
+
+ return __u.__v;
+}
+#endif
+
/* Create the vector [S T U V Z Y X W]. */
static __inline __m128i
_mm_set_epi16 (short __Z, short __Y, short __X, short __W,
@@ -1724,6 +1776,15 @@ _mm_set1_epi32 (int __A)
return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0));
}
+#ifdef __x86_64__
+static __inline __m128i
+_mm_set1_epi64x (long long __A)
+{
+ __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
+ return (__m128i) __builtin_ia32_shufpd ((__v2df)__tmp, (__v2df)__tmp, _MM_SHUFFLE2 (0,0));
+}
+#endif
+
static __inline __m128i
_mm_set1_epi16 (short __A)
{
@@ -1893,12 +1954,28 @@ _mm_cvtsd_si32 (__m128d __A)
return __builtin_ia32_cvtsd2si ((__v2df) __A);
}
+#ifdef __x86_64__
+static __inline long long
+_mm_cvtsd_si64x (__m128d __A)
+{
+ return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+#endif
+
static __inline int
_mm_cvttsd_si32 (__m128d __A)
{
return __builtin_ia32_cvttsd2si ((__v2df) __A);
}
+#ifdef __x86_64__
+static __inline long long
+_mm_cvttsd_si64x (__m128d __A)
+{
+ return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
+}
+#endif
+
static __inline __m128
_mm_cvtsd_ss (__m128 __A, __m128d __B)
{
@@ -1911,6 +1988,14 @@ _mm_cvtsi32_sd (__m128d __A, int __B)
return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
}
+#ifdef __x86_64__
+static __inline __m128d
+_mm_cvtsi64x_sd (__m128d __A, long long __B)
+{
+ return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+#endif
+
static __inline __m128d
_mm_cvtss_sd (__m128d __A, __m128 __B)
{
@@ -2048,7 +2133,7 @@ _mm_add_epi32 (__m128i __A, __m128i __B)
static __inline __m128i
_mm_add_epi64 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_paddq128 ((__v4si)__A, (__v4si)__B);
+ return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
}
static __inline __m128i
@@ -2096,7 +2181,7 @@ _mm_sub_epi32 (__m128i __A, __m128i __B)
static __inline __m128i
_mm_sub_epi64 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_psubq128 ((__v4si)__A, (__v4si)__B);
+ return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
}
static __inline __m128i
@@ -2142,7 +2227,7 @@ _mm_mullo_epi16 (__m128i __A, __m128i __B)
}
static __inline __m64
-_mm_mul_pu16 (__m64 __A, __m64 __B)
+_mm_mul_su32 (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
}
@@ -2459,6 +2544,14 @@ _mm_cvtsi32_si128 (int __A)
return (__m128i) __builtin_ia32_loadd (&__A);
}
+#ifdef __x86_64__
+static __inline __m128i
+_mm_cvtsi64x_si128 (long long __A)
+{
+ return (__m128i) __builtin_ia32_movq2dq (__A);
+}
+#endif
+
static __inline int
_mm_cvtsi128_si32 (__m128i __A)
{
@@ -2467,6 +2560,14 @@ _mm_cvtsi128_si32 (__m128i __A)
return __tmp;
}
+#ifdef __x86_64__
+static __inline long long
+_mm_cvtsi128_si64x (__m128i __A)
+{
+ return __builtin_ia32_movdq2q ((__v2di)__A);
+}
+#endif
+
#endif /* __SSE2__ */
#endif /* __SSE__ */