summaryrefslogtreecommitdiff
path: root/libclc/generic
diff options
context:
space:
mode:
authorJan Vesely <jan.vesely@rutgers.edu>2018-02-06 18:44:45 +0000
committerJan Vesely <jan.vesely@rutgers.edu>2018-02-06 18:44:45 +0000
commite276f35e65ea804df07a81e9cd4f9212d9c591e1 (patch)
treed8fabb8b57ee0df7339902bb9f5ead3fd8fc978b /libclc/generic
parent126b643e89a7ded36983d3ef0e91d64668467e7b (diff)
Add vstore_half_rtn implementation
Passes CTS on carrizo Reviewer: Jeroen Ketema <j.ketema@xs4all.nl> Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
Diffstat (limited to 'libclc/generic')
-rw-r--r--libclc/generic/include/clc/shared/vstore.h2
-rw-r--r--libclc/generic/lib/shared/vstore.cl42
2 files changed, 43 insertions, 1 deletions
diff --git a/libclc/generic/include/clc/shared/vstore.h b/libclc/generic/include/clc/shared/vstore.h
index 549e6bfea64..8639be707e5 100644
--- a/libclc/generic/include/clc/shared/vstore.h
+++ b/libclc/generic/include/clc/shared/vstore.h
@@ -38,11 +38,13 @@ _CLC_VECTOR_VSTORE_PRIM1(float)
_CLC_VECTOR_VSTORE_HALF_PRIM1(float,)
_CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtz)
+_CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtn)
#ifdef cl_khr_fp64
_CLC_VECTOR_VSTORE_PRIM1(double)
_CLC_VECTOR_VSTORE_HALF_PRIM1(double,)
_CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtz)
+ _CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtn)
#endif
#ifdef cl_khr_fp16
diff --git a/libclc/generic/lib/shared/vstore.cl b/libclc/generic/lib/shared/vstore.cl
index cbddd59795b..243c2f2810c 100644
--- a/libclc/generic/lib/shared/vstore.cl
+++ b/libclc/generic/lib/shared/vstore.cl
@@ -124,6 +124,26 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rtz(float x)
return x;
return as_float(as_uint(x) & mask);
}
+_CLC_DEF _CLC_OVERLOAD float __clc_rti(float x)
+{
+ const float inf = copysign(INFINITY, x);
+ /* Set lower 13 bits */
+ int mask = (1 << 13) - 1;
+ const int exp = (as_uint(x) >> 23 & 0xff) - 127;
+ /* Denormals cannot be flushed, and they use different bit for rounding */
+ if (exp < -14)
+ mask = (1 << (13 + min(-(exp + 14), 10))) - 1;
+ /* Handle nan corner case */
+ if (isnan(x))
+ return x;
+ const float next = nextafter(as_float(as_uint(x) | mask), inf);
+ return ((as_uint(x) & mask) == 0) ? x : next;
+}
+_CLC_DEF _CLC_OVERLOAD float __clc_rtn(float x)
+{
+ return ((as_uint(x) & 0x80000000) == 0) ? __clc_rtz(x) : __clc_rti(x);
+}
+
#ifdef cl_khr_fp64
_CLC_DEF _CLC_OVERLOAD double __clc_noop(double x)
{
@@ -145,11 +165,31 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rtz(double x)
return x;
return as_double(as_ulong(x) & mask);
}
+_CLC_DEF _CLC_OVERLOAD double __clc_rti(double x)
+{
+ const double inf = copysign((double)INFINITY, x);
+ /* Set lower 42 bits */
+ long mask = (1UL << 42UL) - 1UL;
+ const int exp = (as_ulong(x) >> 52 & 0x7ff) - 1023;
+ /* Denormals cannot be flushed, and they use different bit for rounding */
+ if (exp < -14)
+ mask = (1UL << (42UL + min(-(exp + 14), 10))) - 1;
+ /* Handle nan corner case */
+ if (isnan(x))
+ return x;
+ const double next = nextafter(as_double(as_ulong(x) | mask), inf);
+ return ((as_ulong(x) & mask) == 0) ? x : next;
+}
+_CLC_DEF _CLC_OVERLOAD double __clc_rtn(double x)
+{
+ return ((as_ulong(x) & 0x8000000000000000UL) == 0) ? __clc_rtz(x) : __clc_rti(x);
+}
#endif
#define __XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \
__FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_noop) \
- __FUNC(SUFFIX ## _rtz, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtz)
+ __FUNC(SUFFIX ## _rtz, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtz) \
+ __FUNC(SUFFIX ## _rtn, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtn)
#define FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \
__XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS)