From 508b6491b90356efc63f9c53e3cb6fae7412e205 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 26 Sep 2012 13:10:26 +0000 Subject: 2012-09-26 Ulrich Drepper Optimize bulk mode for normal_distribution for SSE3. * configure.host: Define cpu_opt_bits_random. * configure.ac: Substitute CPU_OPT_BITS_RANDOM. * include/Makefile.am (bits_headers): Add ${bits_host_headers}. (bits_host_headers): Define. * include/bits/random.tcc: Move __details::_Power_of_2 to... * include/bits/random.h: ...here. * include/std/random: Include . * config/cpu/i486/opt/bits/opt_random.h: New file. * config/cpu/generic/opt/bits/opt_random.h: New file. * configure: Regenerated. * Makefile.in: Regenerated. * aclocal.m4: Regenerated. * doc/Makefile.in: Regenerated. * include/Makefile.in: Regenerated. * libsupc++/Makefile.in: Regenerated. * po/Makefile.in: Regenerated. * python/Makefile.in: Regenerated. * src/Makefile.in: Regenerated. * src/c++11/Makefile.in: Regenerated. * src/c++98/Makefile.in: Regenerated. * testsuite/Makefile.in: Regenerated. * config/cpu/generic/opt/ext/opt_random.h: Fix comment. git-svn-id: https://gcc.gnu.org/svn/gcc/trunk@191758 138bc75d-0d04-0410-961f-82ee72b054a4 --- .../config/cpu/generic/opt/bits/opt_random.h | 38 ++++ .../config/cpu/generic/opt/ext/opt_random.h | 2 +- libstdc++-v3/config/cpu/i486/opt/bits/opt_random.h | 219 +++++++++++++++++++++ 3 files changed, 258 insertions(+), 1 deletion(-) create mode 100644 libstdc++-v3/config/cpu/generic/opt/bits/opt_random.h create mode 100644 libstdc++-v3/config/cpu/i486/opt/bits/opt_random.h (limited to 'libstdc++-v3/config') diff --git a/libstdc++-v3/config/cpu/generic/opt/bits/opt_random.h b/libstdc++-v3/config/cpu/generic/opt/bits/opt_random.h new file mode 100644 index 00000000000..38984967614 --- /dev/null +++ b/libstdc++-v3/config/cpu/generic/opt/bits/opt_random.h @@ -0,0 +1,38 @@ +// Optimizations for random number handling, generic version -*- C++ -*- + +// Copyright (C) 2012 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// . + +/** @file bits/opt_random.h + * This is an internal header file, included by other library headers. + * Do not attempt to use it directly. @headername{random} + */ + +#ifndef _BITS_OPT_RANDOM_H +#define _BITS_OPT_RANDOM_H 1 + +#pragma GCC system_header + + + + +#endif // _BITS_OPT_RANDOM_H diff --git a/libstdc++-v3/config/cpu/generic/opt/ext/opt_random.h b/libstdc++-v3/config/cpu/generic/opt/ext/opt_random.h index cc64f18a50e..5e10800143d 100644 --- a/libstdc++-v3/config/cpu/generic/opt/ext/opt_random.h +++ b/libstdc++-v3/config/cpu/generic/opt/ext/opt_random.h @@ -22,7 +22,7 @@ // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see // . -/** @file ext/random.tcc +/** @file ext/opt_random.h * This is an internal header file, included by other library headers. * Do not attempt to use it directly. @headername{ext/random} */ diff --git a/libstdc++-v3/config/cpu/i486/opt/bits/opt_random.h b/libstdc++-v3/config/cpu/i486/opt/bits/opt_random.h new file mode 100644 index 00000000000..9584e275b96 --- /dev/null +++ b/libstdc++-v3/config/cpu/i486/opt/bits/opt_random.h @@ -0,0 +1,219 @@ +// Optimizations for random number functions, x86 version -*- C++ -*- + +// Copyright (C) 2012 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// . + +/** @file bits/opt_random.h + * This is an internal header file, included by other library headers. + * Do not attempt to use it directly. @headername{random} + */ + +#ifndef _BITS_OPT_RANDOM_H +#define _BITS_OPT_RANDOM_H 1 + +#include + + +#pragma GCC system_header + + +namespace std _GLIBCXX_VISIBILITY(default) +{ +_GLIBCXX_BEGIN_NAMESPACE_VERSION + +#ifdef __SSE3__ + template<> + template + void + normal_distribution:: + __generate(typename normal_distribution::result_type* __f, + typename normal_distribution::result_type* __t, + _UniformRandomNumberGenerator& __urng, + const param_type& __param) + { + typedef uint64_t __uctype; + + if (__f == __t) + return; + + if (_M_saved_available) + { + _M_saved_available = false; + *__f++ = _M_saved * __param.stddev() + __param.mean(); + + if (__f == __t) + return; + } + + constexpr uint64_t __maskval = UINT64_C(0xfffffffffffff); + static const __m128i __mask = _mm_set1_epi64x(__maskval); + static const __m128i __two = _mm_set1_epi64x(0x4000000000000000ull); + static const __m128d __three = _mm_set1_pd(3.0); + const __m128d __av = _mm_set1_pd(__param.mean()); + + const __uctype __urngmin = __urng.min(); + const __uctype __urngmax = __urng.max(); + const __uctype __urngrange = __urngmax - __urngmin; + const __uctype __uerngrange = __urngrange + 1; + + while (__f + 1 < __t) + { + double __le; + __m128d __x; + do + { + union + { + __m128i __i; + __m128d __d; + } __v; + + if (__urngrange > __maskval) + { + if (__detail::_Power_of_2(__uerngrange)) + __v.__i = _mm_and_si128(_mm_set_epi64x(__urng(), + __urng()), + __mask); + else + { + const __uctype __uerange = __maskval + 1; + const __uctype __scaling = __urngrange / __uerange; + const __uctype __past = __uerange * __scaling; + uint64_t __v1; + do + __v1 = __uctype(__urng()) - __urngmin; + while (__v1 >= __past); + __v1 /= __scaling; + uint64_t __v2; + do + __v2 = __uctype(__urng()) - __urngmin; + while (__v2 >= __past); + __v2 /= __scaling; + + __v.__i = _mm_set_epi64x(__v1, __v2); + } + } + else if (__urngrange == __maskval) + __v.__i = _mm_set_epi64x(__urng(), __urng()); + else if ((__urngrange + 2) * __urngrange >= __maskval + && __detail::_Power_of_2(__uerngrange)) + { + uint64_t __v1 = __urng() * __uerngrange + __urng(); + uint64_t __v2 = __urng() * __uerngrange + __urng(); + + __v.__i = _mm_and_si128(_mm_set_epi64x(__v1, __v2), + __mask); + } + else + { + size_t __nrng = 2; + __uctype __high = __maskval / __uerngrange / __uerngrange; + while (__high > __uerngrange) + { + ++__nrng; + __high /= __uerngrange; + } + const __uctype __highrange = __high + 1; + const __uctype __scaling = __urngrange / __highrange; + const __uctype __past = __highrange * __scaling; + __uctype __tmp; + + uint64_t __v1; + do + { + do + __tmp = __uctype(__urng()) - __urngmin; + while (__tmp >= __past); + __v1 = __tmp / __scaling; + for (size_t __cnt = 0; __cnt < __nrng; ++__cnt) + { + __tmp = __v1; + __v1 *= __uerngrange; + __v1 += __uctype(__urng()) - __urngmin; + } + } + while (__v1 > __maskval || __v1 < __tmp); + + uint64_t __v2; + do + { + do + __tmp = __uctype(__urng()) - __urngmin; + while (__tmp >= __past); + __v2 = __tmp / __scaling; + for (size_t __cnt = 0; __cnt < __nrng; ++__cnt) + { + __tmp = __v2; + __v2 *= __uerngrange; + __v2 += __uctype(__urng()) - __urngmin; + } + } + while (__v2 > __maskval || __v2 < __tmp); + + __v.__i = _mm_set_epi64x(__v1, __v2); + } + + __v.__i = _mm_or_si128(__v.__i, __two); + __x = _mm_sub_pd(__v.__d, __three); + __m128d __m = _mm_mul_pd(__x, __x); + __le = _mm_cvtsd_f64(_mm_hadd_pd (__m, __m)); + } + while (__le == 0.0 || __le >= 1.0); + + double __mult = (std::sqrt(-2.0 * std::log(__le) / __le) + * __param.stddev()); + + __x = _mm_add_pd(_mm_mul_pd(__x, _mm_set1_pd(__mult)), __av); + + _mm_storeu_pd(__f, __x); + __f += 2; + } + + if (__f != __t) + { + result_type __x, __y, __r2; + + __detail::_Adaptor<_UniformRandomNumberGenerator, result_type> + __aurng(__urng); + + do + { + __x = result_type(2.0) * __aurng() - 1.0; + __y = result_type(2.0) * __aurng() - 1.0; + __r2 = __x * __x + __y * __y; + } + while (__r2 > 1.0 || __r2 == 0.0); + + const result_type __mult = std::sqrt(-2 * std::log(__r2) / __r2); + _M_saved = __x * __mult; + _M_saved_available = true; + *__f = __y * __mult * __param.stddev() + __param.mean(); + } + } +#endif + + +_GLIBCXX_END_NAMESPACE_VERSION +} // namespace + + +#endif // _BITS_OPT_RANDOM_H -- cgit v1.2.3