From 066dd3838be83741d94bc31a0450ded1af37cf4f Mon Sep 17 00:00:00 2001 From: Brent Huisman <brenthuisman@users.noreply.github.com> Date: Fri, 10 Jun 2022 13:10:58 +0200 Subject: [PATCH] random123 submodule added (#1872) --- .gitmodules | 4 + ext/random123 | 1 + ext/random123/LICENSE | 31 - ext/random123/include/Random123/MicroURNG.hpp | 146 --- .../include/Random123/ReinterpretCtr.hpp | 88 -- ext/random123/include/Random123/aes.h | 398 -------- ext/random123/include/Random123/array.h | 348 ------- ext/random123/include/Random123/ars.h | 204 ---- ext/random123/include/Random123/boxmuller.hpp | 134 --- .../include/Random123/conventional/Engine.hpp | 276 ------ .../Random123/conventional/gsl_cbrng.h | 128 --- .../Random123/features/clangfeatures.h | 93 -- .../Random123/features/compilerfeatures.h | 341 ------- .../include/Random123/features/gccfeatures.h | 263 ------ .../include/Random123/features/iccfeatures.h | 212 ----- .../Random123/features/metalfeatures.h | 111 --- .../include/Random123/features/msvcfeatures.h | 200 ---- .../include/Random123/features/nvccfeatures.h | 125 --- .../Random123/features/open64features.h | 50 - .../Random123/features/openclfeatures.h | 89 -- .../include/Random123/features/pgccfeatures.h | 194 ---- .../include/Random123/features/sse.h | 280 ------ .../Random123/features/sunprofeatures.h | 172 ---- .../include/Random123/features/xlcfeatures.h | 210 ----- .../include/Random123/gsl_microrng.h | 136 --- ext/random123/include/Random123/philox.h | 493 ---------- ext/random123/include/Random123/threefry.h | 870 ------------------ ext/random123/include/Random123/u01fixedpt.h | 200 ---- ext/random123/include/Random123/uniform.hpp | 310 ------- spack/package.py | 1 + 30 files changed, 6 insertions(+), 6102 deletions(-) create mode 160000 ext/random123 delete mode 100644 ext/random123/LICENSE delete mode 100644 ext/random123/include/Random123/MicroURNG.hpp delete mode 100644 ext/random123/include/Random123/ReinterpretCtr.hpp delete mode 100644 ext/random123/include/Random123/aes.h delete mode 100644 ext/random123/include/Random123/array.h delete mode 100644 ext/random123/include/Random123/ars.h delete mode 100644 ext/random123/include/Random123/boxmuller.hpp delete mode 100644 ext/random123/include/Random123/conventional/Engine.hpp delete mode 100644 ext/random123/include/Random123/conventional/gsl_cbrng.h delete mode 100644 ext/random123/include/Random123/features/clangfeatures.h delete mode 100644 ext/random123/include/Random123/features/compilerfeatures.h delete mode 100644 ext/random123/include/Random123/features/gccfeatures.h delete mode 100644 ext/random123/include/Random123/features/iccfeatures.h delete mode 100644 ext/random123/include/Random123/features/metalfeatures.h delete mode 100644 ext/random123/include/Random123/features/msvcfeatures.h delete mode 100644 ext/random123/include/Random123/features/nvccfeatures.h delete mode 100644 ext/random123/include/Random123/features/open64features.h delete mode 100644 ext/random123/include/Random123/features/openclfeatures.h delete mode 100644 ext/random123/include/Random123/features/pgccfeatures.h delete mode 100644 ext/random123/include/Random123/features/sse.h delete mode 100644 ext/random123/include/Random123/features/sunprofeatures.h delete mode 100644 ext/random123/include/Random123/features/xlcfeatures.h delete mode 100644 ext/random123/include/Random123/gsl_microrng.h delete mode 100644 ext/random123/include/Random123/philox.h delete mode 100644 ext/random123/include/Random123/threefry.h delete mode 100644 ext/random123/include/Random123/u01fixedpt.h delete mode 100644 ext/random123/include/Random123/uniform.hpp diff --git a/.gitmodules b/.gitmodules index 2ef0cc17..d5734673 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,6 +10,10 @@ path = ext/fmt url = https://github.com/fmtlib/fmt.git branch = master +[submodule "ext/random123"] + path = ext/random123 + url = https://github.com/DEShawResearch/random123 + branch = main [submodule "ext/json"] path = ext/json url = https://github.com/nlohmann/json.git diff --git a/ext/random123 b/ext/random123 new file mode 160000 index 00000000..726a093c --- /dev/null +++ b/ext/random123 @@ -0,0 +1 @@ +Subproject commit 726a093cd9a73f3ec3c8d7a70ff10ed8efec8d13 diff --git a/ext/random123/LICENSE b/ext/random123/LICENSE deleted file mode 100644 index c6094aca..00000000 --- a/ext/random123/LICENSE +++ /dev/null @@ -1,31 +0,0 @@ -/** @page LICENSE -Copyright 2010-2012, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ diff --git a/ext/random123/include/Random123/MicroURNG.hpp b/ext/random123/include/Random123/MicroURNG.hpp deleted file mode 100644 index 9ea77325..00000000 --- a/ext/random123/include/Random123/MicroURNG.hpp +++ /dev/null @@ -1,146 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __MicroURNG_dot_hpp__ -#define __MicroURNG_dot_hpp__ - -#include <stdexcept> -#include <limits> - -namespace r123{ -/** - Given a CBRNG whose ctr_type has an unsigned integral value_type, - MicroURNG<CBRNG>(c, k) is a type that satisfies the - requirements of a C++11 Uniform Random Number Generator. - - The intended purpose is for a MicroURNG to be passed - as an argument to a C++11 Distribution, e.g., - std::normal_distribution. See examples/MicroURNG.cpp. - - The MicroURNG functor has a period of "only" - - ctr_type.size()*2^32, - - after which it will silently repeat. - - The high 32 bits of the highest word in the counter c, passed to - the constructor must be zero. MicroURNG uses these bits to - "count". - - Older versions of the library permitted a second template - parameter by which the caller could control the number of - bits devoted to the URNG's internal counter. This flexibility - has been disabled because URNGs created with different - numbers of counter bits could, conceivably "collide". - -\code - typedef ?someCBRNG? RNG; - RNG::ctr_type c = ...; // under application control - RNG::key_type k = ...; // - std::normal_distribution<float> nd; - MicroURNG<RNG> urng(c, k); - for(???){ - ... - nd(urng); // may be called several hundred times with BITS=10 - ... - } -\endcode -*/ - -template<typename CBRNG> -class MicroURNG{ - // According to C++11, a URNG requires only a result_type, - // operator()(), min() and max() methods. Everything else - // (ctr_type, key_type, reset() method, etc.) is "value added" - // for the benefit of users that "know" that they're dealing with - // a MicroURNG. -public: - typedef CBRNG cbrng_type; - static const int BITS = 32; - typedef typename cbrng_type::ctr_type ctr_type; - typedef typename cbrng_type::key_type key_type; - typedef typename cbrng_type::ukey_type ukey_type; - typedef typename ctr_type::value_type result_type; - - R123_STATIC_ASSERT( std::numeric_limits<result_type>::digits >= BITS, "The result_type must have at least 32 bits" ); - - result_type operator()(){ - if(last_elem == 0){ - // jam n into the high bits of c - const size_t W = std::numeric_limits<result_type>::digits; - ctr_type c = c0; - c[c0.size()-1] |= n<<(W-BITS); - rdata = b(c,k); - n++; - last_elem = rdata.size(); - } - return rdata[--last_elem]; - } - MicroURNG(cbrng_type _b, ctr_type _c0, ukey_type _uk) : b(_b), c0(_c0), k(_uk), n(0), last_elem(0) { - chkhighbits(); - } - MicroURNG(ctr_type _c0, ukey_type _uk) : b(), c0(_c0), k(_uk), n(0), last_elem(0) { - chkhighbits(); - } - - // _Min and _Max work around a bug in the library shipped with MacOS Xcode 4.5.2. - // See the commment in conventional/Engine.hpp. - const static result_type _Min = 0; - const static result_type _Max = ~((result_type)0); - - static R123_CONSTEXPR result_type min R123_NO_MACRO_SUBST () { return _Min; } - static R123_CONSTEXPR result_type max R123_NO_MACRO_SUBST () { return _Max; } - // extra methods: - const ctr_type& counter() const{ return c0; } - void reset(ctr_type _c0, ukey_type _uk){ - c0 = _c0; - chkhighbits(); - k = _uk; - n = 0; - last_elem = 0; - } - -private: - cbrng_type b; - ctr_type c0; - key_type k; - R123_ULONG_LONG n; - size_t last_elem; - ctr_type rdata; - void chkhighbits(){ - result_type r = c0[c0.size()-1]; - result_type mask = ((uint64_t)std::numeric_limits<result_type>::max R123_NO_MACRO_SUBST ())>>BITS; - if((r&mask) != r) - throw std::runtime_error("MicroURNG: c0, does not have high bits clear"); - } -}; -} // namespace r123 -#endif diff --git a/ext/random123/include/Random123/ReinterpretCtr.hpp b/ext/random123/include/Random123/ReinterpretCtr.hpp deleted file mode 100644 index 164a38b0..00000000 --- a/ext/random123/include/Random123/ReinterpretCtr.hpp +++ /dev/null @@ -1,88 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __ReinterpretCtr_dot_hpp__ -#define __ReinterpretCtr_dot_hpp__ - -#include "features/compilerfeatures.h" -#include <cstring> - -namespace r123{ -/*! - ReinterpretCtr uses memcpy to map back and forth - between a CBRNG's ctr_type and the specified ToType. For example, - after: - - typedef ReinterpretCtr<r123array4x32, Philox2x64> G; - - G is a bona fide CBRNG with ctr_type r123array4x32. - - WARNING: ReinterpretCtr is endian dependent. The - values returned by G, declared as above, - will depend on the endianness of the machine on which it runs. - */ - -template <typename ToType, typename CBRNG> -struct ReinterpretCtr{ - typedef ToType ctr_type; - typedef typename CBRNG::key_type key_type; - typedef typename CBRNG::ctr_type bctype; - typedef typename CBRNG::ukey_type ukey_type; - R123_STATIC_ASSERT(sizeof(ToType) == sizeof(bctype) && sizeof(typename bctype::value_type) != 16, - "ReinterpretCtr: sizeof(ToType) is not the same as sizeof(CBRNG::ctr_type) or CBRNG::ctr_type::value_type looks like it might be __m128i"); - // It's amazingly difficult to safely do conversions with __m128i. - // If we use the operator() implementation below with a CBRNG - // whose ctr_type is r123array1xm128i, gcc4.6 optimizes away the - // memcpys, inlines the operator()(c,k), and produces assembly - // language that ends with an aesenclast instruction with a - // destination operand pointing to an unaligned memory address ... - // Segfault! See: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444 - // MSVC also produces code that crashes. We suspect a - // similar mechanism but haven't done the debugging necessary to - // be sure. We were able to 'fix' gcc4.6 by making bc a mutable - // data member rather than declaring it in the scope of - // operator(). That didn't fix the MSVC problems, though. - // - // Conclusion - don't touch __m128i, at least for now. The - // easiest (but highly imprecise) way to do that is the static - // assertion above that rejects bctype::value_types of size 16. - - // Sep 2011. - ctr_type operator()(ctr_type c, key_type k){ - bctype bc; - std::memcpy(&bc, &c, sizeof(c)); - CBRNG b; - bc = b(bc, k); - std::memcpy(&c, &bc, sizeof(bc)); - return c; - } -}; -} // namespace r123 -#endif diff --git a/ext/random123/include/Random123/aes.h b/ext/random123/include/Random123/aes.h deleted file mode 100644 index 3095fac3..00000000 --- a/ext/random123/include/Random123/aes.h +++ /dev/null @@ -1,398 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __Random123_aes_dot_hpp__ -#define __Random123_aes_dot_hpp__ - -#include "features/compilerfeatures.h" -#include "array.h" - -/* Implement a bona fide AES block cipher. It's minimally -// checked against the test vector in FIPS-197 in ut_aes.cpp. */ -#if R123_USE_AES_NI - -/** @ingroup AESNI */ -typedef struct r123array1xm128i aesni1xm128i_ctr_t; -/** @ingroup AESNI */ -typedef struct r123array1xm128i aesni1xm128i_ukey_t; -/** @ingroup AESNI */ -typedef struct r123array4x32 aesni4x32_ukey_t; -/** @ingroup AESNI */ -enum r123_enum_aesni1xm128i { aesni1xm128i_rounds = 10 }; - -/** \cond HIDDEN_FROM_DOXYGEN */ -R123_STATIC_INLINE __m128i AES_128_ASSIST (__m128i temp1, __m128i temp2) { - __m128i temp3; - temp2 = _mm_shuffle_epi32 (temp2 ,0xff); - temp3 = _mm_slli_si128 (temp1, 0x4); - temp1 = _mm_xor_si128 (temp1, temp3); - temp3 = _mm_slli_si128 (temp3, 0x4); - temp1 = _mm_xor_si128 (temp1, temp3); - temp3 = _mm_slli_si128 (temp3, 0x4); - temp1 = _mm_xor_si128 (temp1, temp3); - temp1 = _mm_xor_si128 (temp1, temp2); - return temp1; -} - -R123_STATIC_INLINE void aesni1xm128iexpand(aesni1xm128i_ukey_t uk, __m128i ret[11]) -{ - __m128i rkey = uk.v[0].m; - __m128i tmp2; - - ret[0] = rkey; - tmp2 = _mm_aeskeygenassist_si128(rkey, 0x1); - rkey = AES_128_ASSIST(rkey, tmp2); - ret[1] = rkey; - - tmp2 = _mm_aeskeygenassist_si128(rkey, 0x2); - rkey = AES_128_ASSIST(rkey, tmp2); - ret[2] = rkey; - - tmp2 = _mm_aeskeygenassist_si128(rkey, 0x4); - rkey = AES_128_ASSIST(rkey, tmp2); - ret[3] = rkey; - - tmp2 = _mm_aeskeygenassist_si128(rkey, 0x8); - rkey = AES_128_ASSIST(rkey, tmp2); - ret[4] = rkey; - - tmp2 = _mm_aeskeygenassist_si128(rkey, 0x10); - rkey = AES_128_ASSIST(rkey, tmp2); - ret[5] = rkey; - - tmp2 = _mm_aeskeygenassist_si128(rkey, 0x20); - rkey = AES_128_ASSIST(rkey, tmp2); - ret[6] = rkey; - - tmp2 = _mm_aeskeygenassist_si128(rkey, 0x40); - rkey = AES_128_ASSIST(rkey, tmp2); - ret[7] = rkey; - - tmp2 = _mm_aeskeygenassist_si128(rkey, 0x80); - rkey = AES_128_ASSIST(rkey, tmp2); - ret[8] = rkey; - - tmp2 = _mm_aeskeygenassist_si128(rkey, 0x1b); - rkey = AES_128_ASSIST(rkey, tmp2); - ret[9] = rkey; - - tmp2 = _mm_aeskeygenassist_si128(rkey, 0x36); - rkey = AES_128_ASSIST(rkey, tmp2); - ret[10] = rkey; -} -/** \endcond */ - -#ifdef __cplusplus -/** @ingroup AESNI */ -struct aesni1xm128i_key_t{ - __m128i k[11]; - aesni1xm128i_key_t(){ - aesni1xm128i_ukey_t uk; - uk.v[0].m = _mm_setzero_si128(); - aesni1xm128iexpand(uk, k); - } - aesni1xm128i_key_t(const aesni1xm128i_ukey_t& uk){ - aesni1xm128iexpand(uk, k); - } - aesni1xm128i_key_t(const aesni4x32_ukey_t& uk){ - aesni1xm128i_ukey_t uk128; - uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]); - aesni1xm128iexpand(uk128, k); - } - aesni1xm128i_key_t& operator=(const aesni1xm128i_ukey_t& uk){ - aesni1xm128iexpand(uk, k); - return *this; - } - aesni1xm128i_key_t& operator=(const aesni4x32_ukey_t& uk){ - aesni1xm128i_ukey_t uk128; - uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]); - aesni1xm128iexpand(uk128, k); - return *this; - } - bool operator==(const aesni1xm128i_key_t& rhs) const{ - for(int i=0; i<11; ++i){ - // Sigh... No r123m128i(__m128i) constructor! - r123m128i li; li.m = k[i]; - r123m128i ri; ri.m = rhs.k[i]; - if( li != ri ) return false; - } - return true; - } - bool operator!=(const aesni1xm128i_key_t& rhs) const{ - return !(*this == rhs); - } - friend std::ostream& operator<<(std::ostream& os, const aesni1xm128i_key_t& v){ - r123m128i ki; - for(int i=0; i<10; ++i){ - ki.m = v.k[i]; - os << ki << " "; - } - ki.m = v.k[10]; - return os << ki; - } - friend std::istream& operator>>(std::istream& is, aesni1xm128i_key_t& v){ - r123m128i ki; - for(int i=0; i<11; ++i){ - is >> ki; - v.k[i] = ki; - } - return is; - } -}; -#else -typedef struct { - __m128i k[11]; -}aesni1xm128i_key_t; - -/** @ingroup AESNI */ -R123_STATIC_INLINE aesni1xm128i_key_t aesni1xm128ikeyinit(aesni1xm128i_ukey_t uk){ - aesni1xm128i_key_t ret; - aesni1xm128iexpand(uk, ret.k); - return ret; -} -#endif - -/** @ingroup AESNI */ -R123_STATIC_INLINE aesni1xm128i_ctr_t aesni1xm128i(aesni1xm128i_ctr_t in, aesni1xm128i_key_t k) { - __m128i x = _mm_xor_si128(k.k[0], in.v[0].m); - x = _mm_aesenc_si128(x, k.k[1]); - x = _mm_aesenc_si128(x, k.k[2]); - x = _mm_aesenc_si128(x, k.k[3]); - x = _mm_aesenc_si128(x, k.k[4]); - x = _mm_aesenc_si128(x, k.k[5]); - x = _mm_aesenc_si128(x, k.k[6]); - x = _mm_aesenc_si128(x, k.k[7]); - x = _mm_aesenc_si128(x, k.k[8]); - x = _mm_aesenc_si128(x, k.k[9]); - x = _mm_aesenclast_si128(x, k.k[10]); - { - aesni1xm128i_ctr_t ret; - ret.v[0].m = x; - return ret; - } -} - -/** @ingroup AESNI */ -R123_STATIC_INLINE aesni1xm128i_ctr_t aesni1xm128i_R(unsigned R, aesni1xm128i_ctr_t in, aesni1xm128i_key_t k){ - R123_ASSERT(R==10); - return aesni1xm128i(in, k); -} - - -/** @ingroup AESNI */ -typedef struct r123array4x32 aesni4x32_ctr_t; -/** @ingroup AESNI */ -typedef aesni1xm128i_key_t aesni4x32_key_t; -/** @ingroup AESNI */ -enum r123_enum_aesni4x32 { aesni4x32_rounds = 10 }; -/** @ingroup AESNI */ -R123_STATIC_INLINE aesni4x32_key_t aesni4x32keyinit(aesni4x32_ukey_t uk){ - aesni1xm128i_ukey_t uk128; - aesni4x32_key_t ret; - uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]); - aesni1xm128iexpand(uk128, ret.k); - return ret; -} - -/** @ingroup AESNI */ -/** The aesni4x32_R function provides a C API to the @ref AESNI "AESNI" CBRNG, allowing the number of rounds to be specified explicitly **/ -R123_STATIC_INLINE aesni4x32_ctr_t aesni4x32_R(unsigned int Nrounds, aesni4x32_ctr_t c, aesni4x32_key_t k){ - aesni1xm128i_ctr_t c128; - c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]); - c128 = aesni1xm128i_R(Nrounds, c128, k); - _mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m); - return c; -} - -#define aesni4x32_rounds aesni1xm128i_rounds - -/** The aesni4x32 macro provides a C API to the @ref AESNI "AESNI" CBRNG, uses the default number of rounds i.e. \c aesni4x32_rounds **/ -/** @ingroup AESNI */ -#define aesni4x32(c,k) aesni4x32_R(aesni4x32_rounds, c, k) - -#ifdef __cplusplus -namespace r123{ -/** -@defgroup AESNI ARS and AESNI Classes and Typedefs - -The ARS4x32, ARS1xm128i, AESNI4x32 and AESNI1xm128i classes export the member functions, typedefs and -operator overloads required by a @ref CBRNG "CBRNG" class. - -ARS1xm128i and AESNI1xm128i are based on the AES block cipher and rely on the AES-NI hardware instructions -available on some some new (2011) CPUs. - -The ARS1xm128i CBRNG and the use of AES for random number generation are described in -<a href="http://dl.acm.org/citation.cfm?doid=2063405"><i>Parallel Random Numbers: As Easy as 1, 2, 3</i> </a>. -Although it uses some cryptographic primitives, ARS1xm128i uses a cryptographically weak key schedule and is \b not suitable for cryptographic use. - -@class AESNI1xm128i -@ingroup AESNI -AESNI exports the member functions, typedefs and operator overloads required by a @ref CBRNG class. - -AESNI1xm128i uses the crypotgraphic AES round function, including the cryptographic key schedule. - -In contrast to the other CBRNGs in the Random123 library, the AESNI1xm128i_R::key_type is opaque -and is \b not identical to the AESNI1xm128i_R::ukey_type. Creating a key_type, using either the constructor -or assignment operator, is significantly more time-consuming than running the bijection (hundreds -of clock cycles vs. tens of clock cycles). - -AESNI1xm128i is only available when the feature-test macro R123_USE_AES_NI is true, which -should occur only when the compiler is configured to generate AES-NI instructions (or -when defaults are overridden by compile-time, compiler-command-line options). - -As of September 2011, the authors know of no statistical flaws with AESNI1xm128i. It -would be an event of major cryptographic note if any such flaws were ever found. -*/ -struct AESNI1xm128i{ - typedef aesni1xm128i_ctr_t ctr_type; - typedef aesni1xm128i_ukey_t ukey_type; - typedef aesni1xm128i_key_t key_type; - static const unsigned int rounds=10; - ctr_type operator()(ctr_type ctr, key_type key) const{ - return aesni1xm128i(ctr, key); - } -}; - -/* @class AESNI4x32 */ -struct AESNI4x32{ - typedef aesni4x32_ctr_t ctr_type; - typedef aesni4x32_ukey_t ukey_type; - typedef aesni4x32_key_t key_type; - static const unsigned int rounds=10; - ctr_type operator()(ctr_type ctr, key_type key) const{ - return aesni4x32(ctr, key); - } -}; - -/** @ingroup AESNI - @class AESNI1xm128i_R - -AESNI1xm128i_R is provided for completeness, but is only instantiable with ROUNDS=10, in -which case it is identical to AESNI1xm128i */ -template <unsigned ROUNDS=10> -struct AESNI1xm128i_R : public AESNI1xm128i{ - R123_STATIC_ASSERT(ROUNDS==10, "AESNI1xm128i_R<R> is only valid with R=10"); -}; - -/** @class AESNI4x32_R **/ -template <unsigned ROUNDS=10> -struct AESNI4x32_R : public AESNI4x32{ - R123_STATIC_ASSERT(ROUNDS==10, "AESNI4x32_R<R> is only valid with R=10"); -}; -} // namespace r123 -#endif /* __cplusplus */ - -#endif /* R123_USE_AES_NI */ - -#if R123_USE_AES_OPENSSL -#include "string.h" -#include <openssl/aes.h> -typedef struct r123array16x8 aesopenssl16x8_ctr_t; -typedef struct r123array16x8 aesopenssl16x8_ukey_t; -#ifdef __cplusplus -struct aesopenssl16x8_key_t{ - AES_KEY k; - aesopenssl16x8_key_t(){ - aesopenssl16x8_ukey_t ukey={{}}; - AES_set_encrypt_key((const unsigned char *)&ukey.v[0], 128, &k); - } - aesopenssl16x8_key_t(const aesopenssl16x8_ukey_t& ukey){ - AES_set_encrypt_key((const unsigned char *)&ukey.v[0], 128, &k); - } - aesopenssl16x8_key_t& operator=(const aesopenssl16x8_ukey_t& ukey){ - AES_set_encrypt_key((const unsigned char *)&ukey.v[0], 128, &k); - return *this; - } - bool operator==(const aesopenssl16x8_key_t& rhs) const{ - return (k.rounds == rhs.k.rounds) && 0==::memcmp(&k.rd_key[0], &rhs.k.rd_key[0], (k.rounds+1) * 4 * sizeof(uint32_t)); - } - bool operator!=(const aesopenssl16x8_key_t& rhs) const{ - return !(*this == rhs); - } - friend std::ostream& operator<<(std::ostream& os, const aesopenssl16x8_key_t& v){ - os << v.k.rounds; - const unsigned int *p = &v.k.rd_key[0]; - for(int i=0; i<(v.k.rounds+1); ++i){ - os << " " << p[0] << " " << p[1] << " " << p[2] << " " << p[3]; - p += 4; - } - return os; - } - friend std::istream& operator>>(std::istream& is, aesopenssl16x8_key_t& v){ - is >> v.k.rounds; - unsigned int *p = &v.k.rd_key[0]; - for(int i=0; i<(v.k.rounds+1); ++i){ - is >> p[0] >> p[1] >> p[2] >> p[3]; - p += 4; - } - return is; - } -}; -#else -typedef struct aesopenssl16x8_key_t{ - AES_KEY k; -}aesopenssl16x8_key_t; -R123_STATIC_INLINE struct aesopenssl16x8_key_t aesopenssl16x8keyinit(aesopenssl16x8_ukey_t uk){ - aesopenssl16x8_key_t ret; - AES_set_encrypt_key((const unsigned char *)&uk.v[0], 128, &ret.k); - return ret; -} -#endif - -R123_STATIC_INLINE R123_FORCE_INLINE(aesopenssl16x8_ctr_t aesopenssl16x8_R(aesopenssl16x8_ctr_t ctr, aesopenssl16x8_key_t key)); -R123_STATIC_INLINE -aesopenssl16x8_ctr_t aesopenssl16x8_R(aesopenssl16x8_ctr_t ctr, aesopenssl16x8_key_t key){ - aesopenssl16x8_ctr_t ret; - AES_encrypt((const unsigned char*)&ctr.v[0], (unsigned char *)&ret.v[0], &key.k); - return ret; -} - -#define aesopenssl16x8_rounds aesni4x32_rounds -#define aesopenssl16x8(c,k) aesopenssl16x8_R(aesopenssl16x8_rounds) - -#ifdef __cplusplus -namespace r123{ -struct AESOpenSSL16x8{ - typedef aesopenssl16x8_ctr_t ctr_type; - typedef aesopenssl16x8_key_t key_type; - typedef aesopenssl16x8_ukey_t ukey_type; - static const unsigned int rounds=10; - ctr_type operator()(const ctr_type& in, const key_type& k){ - ctr_type out; - AES_encrypt((const unsigned char *)&in[0], (unsigned char *)&out[0], &k.k); - return out; - } -}; -} // namespace r123 -#endif /* __cplusplus */ -#endif /* R123_USE_AES_OPENSSL */ - -#endif diff --git a/ext/random123/include/Random123/array.h b/ext/random123/include/Random123/array.h deleted file mode 100644 index c560c3fe..00000000 --- a/ext/random123/include/Random123/array.h +++ /dev/null @@ -1,348 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef _r123array_dot_h__ -#define _r123array_dot_h__ -#include "features/compilerfeatures.h" -#include "features/sse.h" - -#if !defined(__cplusplus) || defined(__METAL_MACOS__) -#define CXXMETHODS(_N, W, T) -#define CXXOVERLOADS(_N, W, T) -#define CXXMETHODS_REQUIRING_STL -#else - -#include <stddef.h> -#include <algorithm> -#include <stdexcept> -#include <iterator> -#include <limits> -#include <iostream> - -/** @defgroup arrayNxW The r123arrayNxW classes - - Each of the r123arrayNxW is a fixed size array of N W-bit unsigned integers. - It is functionally equivalent to the C++11 std::array<N, uintW_t>, - but does not require C++11 features or libraries. - - In addition to meeting most of the requirements of a Container, - it also has a member function, incr(), which increments the zero-th - element and carrys overflows into higher indexed elements. Thus, - by using incr(), sequences of up to 2^(N*W) distinct values - can be produced. - - If SSE is supported by the compiler, then the class - r123array1xm128i is also defined, in which the data member is an - array of one r123m128i object. - - When compiling with __CUDA_ARCH__ defined, the reverse iterator - methods (rbegin, rend, crbegin, crend) are not defined because - CUDA does not support std::reverse_iterator. - -*/ - -/** @cond HIDDEN_FROM_DOXYGEN */ - -template <typename value_type> -inline R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){ - value_type v=0; - for(size_t i=0; i<(3+sizeof(value_type))/4; ++i) - v |= ((value_type)(*p32++)) << (32*i); - return v; -} - -/** @endcond */ - -#ifdef __CUDA_ARCH__ -/* CUDA can't handle std::reverse_iterator. We *could* implement it - ourselves, but let's not bother until somebody really feels a need - to reverse-iterate through an r123array */ -#define CXXMETHODS_REQUIRING_STL -#else -#define CXXMETHODS_REQUIRING_STL \ - public: \ - typedef std::reverse_iterator<iterator> reverse_iterator; \ - typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \ - R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); } \ - R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \ - R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); } \ - R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \ - R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \ - R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); } -#endif - -// Work-alike methods and typedefs modeled on std::array: -#define CXXMETHODS(_N, W, T) \ - typedef T value_type; \ - typedef T* iterator; \ - typedef const T* const_iterator; \ - typedef value_type& reference; \ - typedef const value_type& const_reference; \ - typedef size_t size_type; \ - typedef ptrdiff_t difference_type; \ - typedef T* pointer; \ - typedef const T* const_pointer; \ - /* Boost.array has static_size. C++11 specializes tuple_size */ \ - enum {static_size = _N}; \ - R123_CUDA_DEVICE reference operator[](size_type i){return v[i];} \ - R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];} \ - R123_CUDA_DEVICE reference at(size_type i){ if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \ - R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \ - R123_CUDA_DEVICE size_type size() const { return _N; } \ - R123_CUDA_DEVICE size_type max_size() const { return _N; } \ - R123_CUDA_DEVICE bool empty() const { return _N==0; }; \ - R123_CUDA_DEVICE iterator begin() { return &v[0]; } \ - R123_CUDA_DEVICE iterator end() { return &v[_N]; } \ - R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; } \ - R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; } \ - R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; } \ - R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; } \ - R123_CUDA_DEVICE pointer data(){ return &v[0]; } \ - R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; } \ - R123_CUDA_DEVICE reference front(){ return v[0]; } \ - R123_CUDA_DEVICE const_reference front() const{ return v[0]; } \ - R123_CUDA_DEVICE reference back(){ return v[_N-1]; } \ - R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; } \ - R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \ - /* CUDA3 does not have std::equal */ \ - for (size_t i = 0; i < _N; ++i) \ - if (v[i] != rhs.v[i]) return false; \ - return true; \ - } \ - R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \ - /* CUDA3 does not have std::fill_n */ \ - R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \ - R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \ - /* CUDA3 does not have std::swap_ranges */ \ - for (size_t i = 0; i < _N; ++i) { \ - T tmp = v[i]; \ - v[i] = rhs.v[i]; \ - rhs.v[i] = tmp; \ - } \ - } \ - R123_CUDA_DEVICE r123array##_N##x##W& incr(R123_ULONG_LONG n=1){ \ - /* This test is tricky because we're trying to avoid spurious \ - complaints about illegal shifts, yet still be compile-time \ - evaulated. */ \ - if(sizeof(T)<sizeof(n) && n>>((sizeof(T)<sizeof(n))?8*sizeof(T):0) ) \ - return incr_carefully(n); \ - if(n==1){ \ - ++v[0]; \ - if(_N==1 || R123_BUILTIN_EXPECT(!!v[0], 1)) return *this; \ - }else{ \ - v[0] += n; \ - if(_N==1 || R123_BUILTIN_EXPECT(n<=v[0], 1)) return *this; \ - } \ - /* We expect that the N==?? tests will be \ - constant-folded/optimized away by the compiler, so only the \ - overflow tests (!!v[i]) remain to be done at runtime. For \ - small values of N, it would be better to do this as an \ - uncondtional sequence of adc. An experiment/optimization \ - for another day... \ - N.B. The weird subscripting: v[_N>3?3:0] is to silence \ - a spurious error from icpc \ - */ \ - ++v[_N>1?1:0]; \ - if(_N==2 || R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \ - ++v[_N>2?2:0]; \ - if(_N==3 || R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this; \ - ++v[_N>3?3:0]; \ - for(size_t i=4; i<_N; ++i){ \ - if( R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this; \ - ++v[i]; \ - } \ - return *this; \ - } \ - /* seed(SeedSeq) would be a constructor if having a constructor */ \ - /* didn't cause headaches with defaults */ \ - template <typename SeedSeq> \ - R123_CUDA_DEVICE static r123array##_N##x##W seed(SeedSeq &ss){ \ - r123array##_N##x##W ret; \ - const size_t Ngen = _N*((3+sizeof(value_type))/4); \ - uint32_t u32[Ngen]; \ - uint32_t *p32 = &u32[0]; \ - ss.generate(&u32[0], &u32[Ngen]); \ - for(size_t i=0; i<_N; ++i){ \ - ret.v[i] = assemble_from_u32<value_type>(p32); \ - p32 += (3+sizeof(value_type))/4; \ - } \ - return ret; \ - } \ -protected: \ - R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(R123_ULONG_LONG n){ \ - /* n may be greater than the maximum value of a single value_type */ \ - value_type vtn; \ - vtn = n; \ - v[0] += n; \ - const unsigned rshift = 8* ((sizeof(n)>sizeof(value_type))? sizeof(value_type) : 0); \ - for(size_t i=1; i<_N; ++i){ \ - if(rshift){ \ - n >>= rshift; \ - }else{ \ - n=0; \ - } \ - if( v[i-1] < vtn ) \ - ++n; \ - if( n==0 ) break; \ - vtn = n; \ - v[i] += n; \ - } \ - return *this; \ - } \ - -/** @cond HIDDEN_FROM_DOXYGEN */ - -// There are several tricky considerations for the insertion and extraction -// operators: -// - we would like to be able to print r123array16x8 as a sequence of 16 integers, -// not as 16 bytes. -// - we would like to be able to print r123array1xm128i. -// - we do not want an int conversion operator in r123m128i because it causes -// lots of ambiguity problems with automatic promotions. -// Solution: r123arrayinsertable and r123arrayextractable - -template<typename T> -struct r123arrayinsertable{ - const T& v; - r123arrayinsertable(const T& t_) : v(t_) {} - friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<T>& t){ - return os << t.v; - } -}; - -template<> -struct r123arrayinsertable<uint8_t>{ - const uint8_t& v; - r123arrayinsertable(const uint8_t& t_) : v(t_) {} - friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<uint8_t>& t){ - return os << (int)t.v; - } -}; - -template<typename T> -struct r123arrayextractable{ - T& v; - r123arrayextractable(T& t_) : v(t_) {} - friend std::istream& operator>>(std::istream& is, r123arrayextractable<T>& t){ - return is >> t.v; - } -}; - -template<> -struct r123arrayextractable<uint8_t>{ - uint8_t& v; - r123arrayextractable(uint8_t& t_) : v(t_) {} - friend std::istream& operator>>(std::istream& is, r123arrayextractable<uint8_t>& t){ - int i; - is >> i; - t.v = i; - return is; - } -}; -/** @endcond */ - -#define CXXOVERLOADS(_N, W, T) \ - \ -inline std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){ \ - os << r123arrayinsertable<T>(a.v[0]); \ - for(size_t i=1; i<_N; ++i) \ - os << " " << r123arrayinsertable<T>(a.v[i]); \ - return os; \ -} \ - \ -inline std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){ \ - for(size_t i=0; i<_N; ++i){ \ - r123arrayextractable<T> x(a.v[i]); \ - is >> x; \ - } \ - return is; \ -} \ - \ -namespace r123{ \ - typedef r123array##_N##x##W Array##_N##x##W; \ -} - -#endif /* __cplusplus */ - -/* _r123array_tpl expands to a declaration of struct r123arrayNxW. - - In C, it's nothing more than a struct containing an array of N - objects of type T. - - In C++ it's the same, but endowed with an assortment of member - functions, typedefs and friends. In C++, r123arrayNxW looks a lot - like std::array<T,N>, has most of the capabilities of a container, - and satisfies the requirements outlined in compat/Engine.hpp for - counter and key types. ArrayNxW, in the r123 namespace is - a typedef equivalent to r123arrayNxW. -*/ - -#define _r123array_tpl(_N, W, T) \ - /** @ingroup arrayNxW */ \ - /** @see arrayNxW */ \ -struct r123array##_N##x##W{ \ - T v[_N]; \ - CXXMETHODS(_N, W, T) \ - CXXMETHODS_REQUIRING_STL \ -}; \ - \ -CXXOVERLOADS(_N, W, T) - - -_r123array_tpl(1, 32, uint32_t) /* r123array1x32 */ -_r123array_tpl(2, 32, uint32_t) /* r123array2x32 */ -_r123array_tpl(4, 32, uint32_t) /* r123array4x32 */ -_r123array_tpl(8, 32, uint32_t) /* r123array8x32 */ - -#if R123_USE_64BIT -_r123array_tpl(1, 64, uint64_t) /* r123array1x64 */ -_r123array_tpl(2, 64, uint64_t) /* r123array2x64 */ -_r123array_tpl(4, 64, uint64_t) /* r123array4x64 */ -#endif - -_r123array_tpl(16, 8, uint8_t) /* r123array16x8 for ARSsw, AESsw */ - -#if R123_USE_SSE -_r123array_tpl(1, m128i, r123m128i) /* r123array1x128i for ARSni, AESni */ -#endif - -/* In C++, it's natural to use sizeof(a::value_type), but in C it's - pretty convoluted to figure out the width of the value_type of an - r123arrayNxW: -*/ -#define R123_W(a) (8*sizeof(((a *)0)->v[0])) - -/** @namespace r123 - Most of the Random123 C++ API is contained in the r123 namespace. -*/ - -#endif - diff --git a/ext/random123/include/Random123/ars.h b/ext/random123/include/Random123/ars.h deleted file mode 100644 index a027b6fe..00000000 --- a/ext/random123/include/Random123/ars.h +++ /dev/null @@ -1,204 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __Random123_ars_dot_hpp__ -#define __Random123_ars_dot_hpp__ - -#include "features/compilerfeatures.h" -#include "array.h" - -#if R123_USE_AES_NI - -#ifndef ARS1xm128i_DEFAULT_ROUNDS -#define ARS1xm128i_DEFAULT_ROUNDS 7 -#endif - -/** @ingroup AESNI */ -enum r123_enum_ars1xm128i {ars1xm128i_rounds = ARS1xm128i_DEFAULT_ROUNDS}; - -/* ARS1xm128i with Weyl keys. Fast, and Crush-resistant, but NOT CRYPTO. */ -/** @ingroup AESNI */ -typedef struct r123array1xm128i ars1xm128i_ctr_t; -/** @ingroup AESNI */ -typedef struct r123array1xm128i ars1xm128i_key_t; -/** @ingroup AESNI */ -typedef struct r123array1xm128i ars1xm128i_ukey_t; -/** @ingroup AESNI */ -R123_STATIC_INLINE ars1xm128i_key_t ars1xm128ikeyinit(ars1xm128i_ukey_t uk) { return uk; } -/** @ingroup AESNI */ -R123_STATIC_INLINE ars1xm128i_ctr_t ars1xm128i_R(unsigned int Nrounds, ars1xm128i_ctr_t in, ars1xm128i_key_t k){ - __m128i kweyl = _mm_set_epi64x(R123_64BIT(0xBB67AE8584CAA73B), /* sqrt(3) - 1.0 */ - R123_64BIT(0x9E3779B97F4A7C15)); /* golden ratio */ - /* N.B. the aesenc instructions do the xor *after* - // so if we want to follow the AES pattern, we - // have to do the initial xor explicitly */ - __m128i kk = k.v[0].m; - __m128i v = _mm_xor_si128(in.v[0].m, kk); - ars1xm128i_ctr_t ret; - R123_ASSERT(Nrounds<=10); - if( Nrounds>1 ){ - kk = _mm_add_epi64(kk, kweyl); - v = _mm_aesenc_si128(v, kk); - } - if( Nrounds>2 ){ - kk = _mm_add_epi64(kk, kweyl); - v = _mm_aesenc_si128(v, kk); - } - if( Nrounds>3 ){ - kk = _mm_add_epi64(kk, kweyl); - v = _mm_aesenc_si128(v, kk); - } - if( Nrounds>4 ){ - kk = _mm_add_epi64(kk, kweyl); - v = _mm_aesenc_si128(v, kk); - } - if( Nrounds>5 ){ - kk = _mm_add_epi64(kk, kweyl); - v = _mm_aesenc_si128(v, kk); - } - if( Nrounds>6 ){ - kk = _mm_add_epi64(kk, kweyl); - v = _mm_aesenc_si128(v, kk); - } - if( Nrounds>7 ){ - kk = _mm_add_epi64(kk, kweyl); - v = _mm_aesenc_si128(v, kk); - } - if( Nrounds>8 ){ - kk = _mm_add_epi64(kk, kweyl); - v = _mm_aesenc_si128(v, kk); - } - if( Nrounds>9 ){ - kk = _mm_add_epi64(kk, kweyl); - v = _mm_aesenc_si128(v, kk); - } - kk = _mm_add_epi64(kk, kweyl); - v = _mm_aesenclast_si128(v, kk); - ret.v[0].m = v; - return ret; -} - -/** @def ars1xm128i -@ingroup AESNI -The ars1mx128i macro provides a C API interface to the @ref AESNI "ARS" CBRNG with the default number of rounds i.e. \c ars1xm128i_rounds **/ -#define ars1xm128i(c,k) ars1xm128i_R(ars1xm128i_rounds, c, k) - -/** @ingroup AESNI */ -typedef struct r123array4x32 ars4x32_ctr_t; -/** @ingroup AESNI */ -typedef struct r123array4x32 ars4x32_key_t; -/** @ingroup AESNI */ -typedef struct r123array4x32 ars4x32_ukey_t; -/** @ingroup AESNI */ -enum r123_enum_ars4x32 {ars4x32_rounds = ARS1xm128i_DEFAULT_ROUNDS}; -/** @ingroup AESNI */ -R123_STATIC_INLINE ars4x32_key_t ars4x32keyinit(ars4x32_ukey_t uk) { return uk; } -/** @ingroup AESNI */ -R123_STATIC_INLINE ars4x32_ctr_t ars4x32_R(unsigned int Nrounds, ars4x32_ctr_t c, ars4x32_key_t k){ - ars1xm128i_ctr_t c128; - ars1xm128i_key_t k128; - c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]); - k128.v[0].m = _mm_set_epi32(k.v[3], k.v[2], k.v[1], k.v[0]); - c128 = ars1xm128i_R(Nrounds, c128, k128); - _mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m); - return c; -} - -/** @def ars4x32 -@ingroup AESNI -The ars4x32 macro provides a C API interface to the @ref AESNI "ARS" CBRNG with the default number of rounds i.e. \c ars4x32_rounds **/ -#define ars4x32(c,k) ars4x32_R(ars4x32_rounds, c, k) - -#ifdef __cplusplus -namespace r123{ -/** -@ingroup AESNI - -ARS1xm128i_R exports the member functions, typedefs and operator overloads required by a @ref CBRNG class. - -ARS1xm128i uses the crypotgraphic AES round function, but a @b non-cryptographc key schedule -to save time and space. - -ARS1xm128i is only available when the feature-test macro R123_USE_AES_NI is true, which -should occur only when the compiler is configured to generate AES-NI instructions (or -when defaults are overridden by compile-time, compiler-command-line options). - -The template argument, ROUNDS, is the number of times the ARS round -functions will be applied. - -As of September 2011, the authors know of no statistical flaws with -ROUNDS=5 or more. - -@class ARS1xm128i_R - -*/ -template<unsigned int ROUNDS> -struct ARS1xm128i_R{ - typedef ars1xm128i_ctr_t ctr_type; - typedef ars1xm128i_key_t key_type; - typedef ars1xm128i_key_t ukey_type; - static const unsigned int rounds=ROUNDS; - R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){ - return ars1xm128i_R(ROUNDS, ctr, key); - } -}; - -/** @class ARS4x32_R - @ingroup AESNI -*/ - -template<unsigned int ROUNDS> -struct ARS4x32_R{ - typedef ars4x32_ctr_t ctr_type; - typedef ars4x32_key_t key_type; - typedef ars4x32_key_t ukey_type; - static const unsigned int rounds=ROUNDS; - R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){ - return ars4x32_R(ROUNDS, ctr, key); - } -}; -/** -@ingroup AESNI - -@class ARS1xm128i_R - ARS1xm128i is equivalent to ARS1xm128i_R<7>. With 7 rounds, - the ARS1xm128i CBRNG has a considerable safety margin over the minimum number - of rounds with no known statistical flaws, but still has excellent - performance. */ -typedef ARS1xm128i_R<ars1xm128i_rounds> ARS1xm128i; -typedef ARS4x32_R<ars4x32_rounds> ARS4x32; -} // namespace r123 - -#endif /* __cplusplus */ - -#endif /* R123_USE_AES_NI */ - -#endif diff --git a/ext/random123/include/Random123/boxmuller.hpp b/ext/random123/include/Random123/boxmuller.hpp deleted file mode 100644 index 43b6db49..00000000 --- a/ext/random123/include/Random123/boxmuller.hpp +++ /dev/null @@ -1,134 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -// This file implements the Box-Muller method for generating gaussian -// random variables (GRVs). Box-Muller has the advantage of -// deterministically requiring exactly two uniform random variables as -// input and producing exactly two GRVs as output, which makes it -// especially well-suited to the counter-based generators in -// Random123. Other methods (e.g., Ziggurat, polar) require an -// indeterminate number of inputs for each output and so require a -// 'MicroURNG' to be used with Random123. The down side of Box-Muller -// is that it calls sincos, log and sqrt, which may be slow. However, -// on GPUs, these functions are remarkably fast, which makes -// Box-Muller the fastest GRV generator we know of on GPUs. -// -// This file exports two structs and one overloaded function, -// all in the r123 namespace: -// struct r123::float2{ float x,y; } -// struct r123::double2{ double x,y; } -// -// r123::float2 r123::boxmuller(uint32_t u0, uint32_t u1); -// r123::double2 r123::boxmuller(uint64_t u0, uint64_t u1); -// -// float2 and double2 are identical to their synonymous global- -// namespace structures in CUDA. -// -// This file may not be as portable, and has not been tested as -// rigorously as other files in the library, e.g., the generators. -// Nevertheless, we hope it is useful and we encourage developers to -// copy it and modify it for their own use. We invite comments and -// improvements. - -#ifndef _r123_BOXMULLER_HPP__ -#define _r123_BOXMULLER_HPP__ - -#include <Random123/features/compilerfeatures.h> -#include <Random123/uniform.hpp> -#include <math.h> - -namespace r123{ - -typedef ::float2 float2; -typedef ::double2 double2; - -#if !defined(R123_NO_SINCOS) && defined(__APPLE__) -/* MacOS X 10.10.5 (2015) doesn't have sincosf */ -#define R123_NO_SINCOS 1 -#endif - -#if R123_NO_SINCOS /* enable this if sincos and sincosf are not in the math library */ -R123_CUDA_DEVICE R123_STATIC_INLINE void sincosf(float x, float *s, float *c) { - *s = sinf(x); - *c = cosf(x); -} - -R123_CUDA_DEVICE R123_STATIC_INLINE void sincos(double x, double *s, double *c) { - *s = sin(x); - *c = cos(x); -} -#endif /* sincos is not in the math library */ - -#if !defined(CUDART_VERSION) || CUDART_VERSION < 5000 /* enabled if sincospi and sincospif are not in math lib */ - -R123_CUDA_DEVICE R123_STATIC_INLINE void sincospif(float x, float *s, float *c){ - const float PIf = 3.1415926535897932f; - sincosf(PIf*x, s, c); -} - -R123_CUDA_DEVICE R123_STATIC_INLINE void sincospi(double x, double *s, double *c) { - const double PI = 3.1415926535897932; - sincos(PI*x, s, c); -} -#endif /* sincospi is not in math lib */ - -/* - * take two 32bit unsigned random values and return a float2 with - * two random floats in a normal distribution via a Box-Muller transform - */ -R123_CUDA_DEVICE R123_STATIC_INLINE float2 boxmuller(uint32_t u0, uint32_t u1) { - float r; - float2 f; - sincospif(uneg11<float>(u0), &f.x, &f.y); - r = sqrtf(-2.f * logf(u01<float>(u1))); // u01 is guaranteed to avoid 0. - f.x *= r; - f.y *= r; - return f; -} - -/* - * take two 64bit unsigned random values and return a double2 with - * two random doubles in a normal distribution via a Box-Muller transform - */ -R123_CUDA_DEVICE R123_STATIC_INLINE double2 boxmuller(uint64_t u0, uint64_t u1) { - double r; - double2 f; - - sincospi(uneg11<double>(u0), &f.x, &f.y); - r = sqrt(-2. * log(u01<double>(u1))); // u01 is guaranteed to avoid 0. - f.x *= r; - f.y *= r; - return f; -} -} // namespace r123 - -#endif /* BOXMULLER_H__ */ diff --git a/ext/random123/include/Random123/conventional/Engine.hpp b/ext/random123/include/Random123/conventional/Engine.hpp deleted file mode 100644 index bd2da2e1..00000000 --- a/ext/random123/include/Random123/conventional/Engine.hpp +++ /dev/null @@ -1,276 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __Engine_dot_hpp_ -#define __Engine_dot_hpp_ - -#include "../features/compilerfeatures.h" -#include "../array.h" -#include <limits> -#include <stdexcept> -#include <sstream> -#include <algorithm> -#include <vector> -#if R123_USE_CXX11_TYPE_TRAITS -#include <type_traits> -#endif - -namespace r123{ -/** - If G satisfies the requirements of a CBRNG, and has a ctr_type whose - value_type is an unsigned integral type, then Engine<G> satisfies - the requirements of a C++11 "Uniform Random Number Engine" and can - be used in any context where such an object is expected. - - Note that wrapping a counter based RNG with a traditional API in - this way obscures much of the power of counter based PRNGs. - Nevertheless, it may be of value in applications that are already - coded to work with the C++11 random number engines. - - The MicroURNG template in MicroURNG.hpp - provides the more limited functionality of a C++11 "Uniform - Random Number Generator", but leaves the application in control - of counters and keys and hence may be preferable to the Engine template. - For example, a MicroURNG allows one to use C++11 "Random Number - Distributions" without giving up control over the counters - and keys. -*/ - -template<typename CBRNG> -struct Engine { - typedef CBRNG cbrng_type; - typedef typename CBRNG::ctr_type ctr_type; - typedef typename CBRNG::key_type key_type; - typedef typename CBRNG::ukey_type ukey_type; - typedef typename ctr_type::value_type result_type; - -protected: - cbrng_type b; - key_type key; - ctr_type c; - ctr_type v; - - void fix_invariant(){ - if( v.back() != 0 ) { - result_type vv = v.back(); - v = b(c, key); - v.back() = vv; - } - } -public: - explicit Engine() : b(), c() { - ukey_type x = {{}}; - v.back() = 0; - key = x; - } - explicit Engine(result_type r) : b(), c() { - ukey_type x = {{typename ukey_type::value_type(r)}}; - v.back() = 0; - key = x; - } - // 26.5.3 says that the SeedSeq templates shouldn't particpate in - // overload resolution unless the type qualifies as a SeedSeq. - // How that is determined is unspecified, except that "as a - // minimum a type shall not qualify as a SeedSeq if it is - // implicitly convertible to a result_type." - // - // First, we make sure that even the non-const copy constructor - // works as expected. In addition, if we've got C++11 - // type_traits, we use enable_if and is_convertible to implement - // the convertible-to-result_type restriction. Otherwise, the - // template is unconditional and will match in some surpirsing - // and undesirable situations. - Engine(Engine& e) : b(e.b), key(e.key), c(e.c){ - v.back() = e.v.back(); - fix_invariant(); - } - Engine(const Engine& e) : b(e.b), key(e.key), c(e.c){ - v.back() = e.v.back(); - fix_invariant(); - } - - template <typename SeedSeq> - explicit Engine(SeedSeq &s -#if R123_USE_CXX11_TYPE_TRAITS - , typename std::enable_if<!std::is_convertible<SeedSeq, result_type>::value>::type* =0 -#endif - ) - : b(), c() { - ukey_type ukey = ukey_type::seed(s); - key = ukey; - v.back() = 0; - } - void seed(result_type r){ - *this = Engine(r); - } - template <typename SeedSeq> - void seed(SeedSeq &s -#if R123_USE_CXX11_TYPE_TRAITS - , typename std::enable_if<!std::is_convertible<SeedSeq, result_type>::value>::type* =0 -#endif - ){ - *this = Engine(s); - } - void seed(){ - *this = Engine(); - } - friend bool operator==(const Engine& lhs, const Engine& rhs){ - return lhs.c==rhs.c && lhs.v.back() == rhs.v.back() && lhs.key == rhs.key; - } - friend bool operator!=(const Engine& lhs, const Engine& rhs){ - return lhs.c!=rhs.c || lhs.v.back()!=rhs.v.back() || lhs.key!=rhs.key; - } - - friend std::ostream& operator<<(std::ostream& os, const Engine& be){ - return os << be.c << " " << be.key << " " << be.v.back(); - } - - friend std::istream& operator>>(std::istream& is, Engine& be){ - is >> be.c >> be.key >> be.v.back(); - be.fix_invariant(); - return is; - } - - // The <random> shipped with MacOS Xcode 4.5.2 imposes a - // non-standard requirement that URNGs also have static data - // members: _Min and _Max. Later versions of libc++ impose the - // requirement only when constexpr isn't supported. Although the - // Xcode 4.5.2 requirement is clearly non-standard, it is unlikely - // to be fixed and it is very easy work around. We certainly - // don't want to go to great lengths to accommodate every buggy - // library we come across, but in this particular case, the effort - // is low and the benefit is high, so it's worth doing. Thanks to - // Yan Zhou for pointing this out to us. See similar code in - // ../MicroURNG.hpp - const static result_type _Min = 0; - const static result_type _Max = ~((result_type)0); - - static R123_CONSTEXPR result_type min R123_NO_MACRO_SUBST () { return _Min; } - static R123_CONSTEXPR result_type max R123_NO_MACRO_SUBST () { return _Max; } - - result_type operator()(){ - if( c.size() == 1 ) // short-circuit the scalar case. Compilers aren't mind-readers. - return b(c.incr(), key)[0]; - result_type& elem = v.back(); - if( elem == 0 ){ - v = b(c.incr(), key); - result_type ret = v.back(); - elem = c.size()-1; - return ret; - } - return v[--elem]; - } - - void discard(R123_ULONG_LONG skip){ - // don't forget: elem counts down - size_t nelem = c.size(); - size_t sub = skip % nelem; - result_type& elem = v.back(); - skip /= nelem; - if (elem < sub) { - elem += nelem; - skip++; - } - elem -= sub; - c.incr(skip); - fix_invariant(); - } - - //-------------------------- - // Some bonus methods, not required for a Random Number - // Engine - - // Constructors and seed() method for ukey_type seem useful - // We need const and non-const to supersede the SeedSeq template. - explicit Engine(const ukey_type &uk) : key(uk), c(){ v.back() = 0; } - explicit Engine(ukey_type &uk) : key(uk), c(){ v.back() = 0; } - void seed(const ukey_type& uk){ - *this = Engine(uk); - } - void seed(ukey_type& uk){ - *this = Engine(uk); - } - -#if R123_USE_CXX11_TYPE_TRAITS - template <typename DUMMY=void> - explicit Engine(const key_type& k, - typename std::enable_if<!std::is_same<ukey_type, key_type>::value, DUMMY>::type* = 0) - : key(k), c(){ v.back() = 0; } - - template <typename DUMMY=void> - void seed(const key_type& k, - typename std::enable_if<!std::is_same<ukey_type, key_type>::value, DUMMY>::type* = 0){ - *this = Engine(k); - } -#endif - - // Forward the e(counter) to the CBRNG we are templated - // on, using the current value of the key. - ctr_type operator()(const ctr_type& c) const{ - return b(c, key); - } - - key_type getkey() const{ - return key; - } - - // N.B. setkey(k) is different from seed(k) because seed(k) zeros - // the counter (per the C++11 requirements for an Engine), whereas - // setkey does not. - void setkey(const key_type& k){ - key = k; - fix_invariant(); - } - - // Maybe the caller want's to know the details of - // the internal state, e.g., so it can call a different - // bijection with the same counter. - std::pair<ctr_type, result_type> getcounter() const { - return std::make_pair(c, v.back()); - } - - // And the inverse. - void setcounter(const ctr_type& _c, result_type _elem){ - static const size_t nelem = c.size(); - if( _elem >= nelem ) - throw std::range_error("Engine::setcounter called with elem out of range"); - c = _c; - v.back() = _elem; - fix_invariant(); - } - - void setcounter(const std::pair<ctr_type, result_type>& ce){ - setcounter(ce.first, ce.second); - } -}; -} // namespace r123 - -#endif diff --git a/ext/random123/include/Random123/conventional/gsl_cbrng.h b/ext/random123/include/Random123/conventional/gsl_cbrng.h deleted file mode 100644 index 44457d00..00000000 --- a/ext/random123/include/Random123/conventional/gsl_cbrng.h +++ /dev/null @@ -1,128 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __r123_compat_gslrng_dot_h__ -#define __r123_compat_gslrng_dot_h__ - -#include <gsl/gsl_rng.h> -#include <string.h> - -/** - The macro: GSL_CBRNG(NAME, CBRNGNAME) - declares the necessary structs and constants that define a - gsl_rng_NAME type based on the counter-based RNG CBRNGNAME. For example: - - Usage: - - @code - #include <Random123/threefry.h> - #include <Random123/conventional/gsl_cbrng.h> // this file - GSL_CBRNG(cbrng, threefry4x32); // creates gsl_rng_cbrng - - int main(int argc, char **argv){ - gsl_rng *r = gsl_rng_alloc(gsl_rng_cbrng); - ... use r as you would use any other gsl_rng ... - } - @endcode - - It requires that NAME be the name of a CBRNG that follows the - naming and stylistic conventions of the Random123 library. - - Note that wrapping a \ref CBRNG "counter-based PRNG" with a traditional API in - this way obscures much of the power of the CBRNG API. - Nevertheless, it may be of value to applications that are already - coded to work with GSL random number generators, and that wish - to use the RNGs in the Random123 library. - - */ - -#define GSL_CBRNG(NAME, CBRNGNAME) \ -const gsl_rng_type *gsl_rng_##NAME; \ - \ -typedef struct{ \ - CBRNGNAME##_ctr_t ctr; \ - CBRNGNAME##_ctr_t r; \ - CBRNGNAME##_key_t key; \ - int elem; \ -} NAME##_state; \ - \ -static unsigned long int NAME##_get(void *vstate){ \ - NAME##_state *st = (NAME##_state *)vstate; \ - const int N=sizeof(st->ctr.v)/sizeof(st->ctr.v[0]); \ - if( st->elem == 0 ){ \ - ++st->ctr.v[0]; \ - if( N>1 && st->ctr.v[0] == 0 ) ++st->ctr.v[1]; \ - if( N>2 && st->ctr.v[1] == 0 ) ++st->ctr.v[2]; \ - if( N>3 && st->ctr.v[2] == 0 ) ++st->ctr.v[3]; \ - st->r = CBRNGNAME(st->ctr, st->key); \ - st->elem = N; \ - } \ - return 0xffffffffUL & st->r.v[--st->elem]; \ -} \ - \ -static double \ -NAME##_get_double (void * vstate) \ -{ \ - return NAME##_get (vstate)/4294967296.0; \ -} \ - \ -static void NAME##_set(void *vstate, unsigned long int s){ \ - NAME##_state *st = (NAME##_state *)vstate; \ - st->elem = 0; \ - /* Assume that key and ctr have an array member, v, \ - as if they are r123arrayNxW. If not, this will fail \ - to compile. In particular, this macro fails to compile \ - when the underlying CBRNG requires use of keyinit */ \ - memset(&st->ctr.v[0], 0, sizeof(st->ctr.v)); \ - memset(&st->key.v[0], 0, sizeof(st->key.v)); \ - /* GSL 1.15 documentation says this about gsl_rng_set: \ - Note that the most generators only accept 32-bit seeds, with higher \ - values being reduced modulo 2^32. For generators with smaller \ - ranges the maximum seed value will typically be lower. \ - so we won't jump through any hoops here to deal with \ - high bits if sizeof(unsigned long) > sizeof(uint32_t). */ \ - st->key.v[0] = s; \ -} \ - \ -static const gsl_rng_type NAME##_type = { \ - #NAME, \ - 0xffffffffUL, \ - 0, \ - sizeof(NAME##_state), \ - &NAME##_set, \ - &NAME##_get, \ - &NAME##_get_double \ -}; \ - \ -const gsl_rng_type *gsl_rng_##NAME = &NAME##_type - -#endif - diff --git a/ext/random123/include/Random123/features/clangfeatures.h b/ext/random123/include/Random123/features/clangfeatures.h deleted file mode 100644 index 1e3c8cfd..00000000 --- a/ext/random123/include/Random123/features/clangfeatures.h +++ /dev/null @@ -1,93 +0,0 @@ -/* -Copyright 2010-2016, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __clangfeatures_dot_hpp -#define __clangfeatures_dot_hpp - -#ifndef R123_USE_X86INTRIN_H -#if (defined(__x86_64__)||defined(__i386__)) -#define R123_USE_X86INTRIN_H 1 -#else -#define R123_USE_X86INTRIN_H 0 -#endif -#endif - -#ifndef R123_USE_CXX11_UNRESTRICTED_UNIONS -#define R123_USE_CXX11_UNRESTRICTED_UNIONS __has_feature(cxx_unrestricted_unions) -#endif - -#ifndef R123_USE_CXX11_STATIC_ASSERT -#define R123_USE_CXX11_STATIC_ASSERT __has_feature(cxx_static_assert) -#endif - -// With clang-3.6, -Wall warns about unused-local-typedefs. -// The "obvious" thing to do is to ignore -Wunused-local-typedefs, -// but that doesn't work because earlier versions of clang blow -// up on an 'unknown warning group'. So we briefly ignore -Wall... -// It's tempting to just give up on static assertions in pre-c++11 code. -#if !R123_USE_CXX11_STATIC_ASSERT && !defined(R123_STATIC_ASSERT) -#define R123_STATIC_ASSERT(expr, msg) \ -_Pragma("clang diagnostic push") \ -_Pragma("clang diagnostic ignored \"-Wall\"") \ -typedef char static_assertion[(!!(expr))*2-1] \ -_Pragma("clang diagnostic pop") -#endif - -#ifndef R123_USE_CXX11_CONSTEXPR -#define R123_USE_CXX11_CONSTEXPR __has_feature(cxx_constexpr) -#endif - -#ifndef R123_USE_CXX11_EXPLICIT_CONVERSIONS -#define R123_USE_CXX11_EXPLICIT_CONVERSIONS __has_feature(cxx_explicit_conversions) -#endif - -// With clang-3.0, the apparently simpler: -// #define R123_USE_CXX11_RANDOM __has_include(<random>) -// dumps core. -#ifndef R123_USE_CXX11_RANDOM -#if __cplusplus>=201103L && __has_include(<random>) -#define R123_USE_CXX11_RANDOM 1 -#else -#define R123_USE_CXX11_RANDOM 0 -#endif -#endif - -#ifndef R123_USE_CXX11_TYPE_TRAITS -#if __cplusplus>=201103L && __has_include(<type_traits>) -#define R123_USE_CXX11_TYPE_TRAITS 1 -#else -#define R123_USE_CXX11_TYPE_TRAITS 0 -#endif -#endif - -#include "gccfeatures.h" - -#endif diff --git a/ext/random123/include/Random123/features/compilerfeatures.h b/ext/random123/include/Random123/features/compilerfeatures.h deleted file mode 100644 index d2fe1e7c..00000000 --- a/ext/random123/include/Random123/features/compilerfeatures.h +++ /dev/null @@ -1,341 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -/** - -@page porting Preprocessor symbols for porting Random123 to different platforms. - -The Random123 library is portable across C, C++, CUDA, OpenCL environments, -and multiple operating systems (Linux, Windows 7, Mac OS X, FreeBSD, Solaris). -This level of portability requires the abstraction of some features -and idioms that are either not standardized (e.g., asm statments), or for which -different vendors have their own standards (e.g., SSE intrinsics) or for -which vendors simply refuse to conform to well-established standards (e.g., <inttypes.h>). - -Random123/features/compilerfeatures.h -conditionally includes a compiler-or-OS-specific Random123/featires/XXXfeatures.h file which -defines appropriate values for the preprocessor symbols which can be used with -a specific compiler or OS. Those symbols will then -be used by other header files and source files in the Random123 -library (and may be used by applications) to control what actually -gets presented to the compiler. - -Most of the symbols are boolean valued. In general, they will -\b always be defined with value either 1 or 0, so do -\b NOT use \#ifdef. Use \#if R123_USE_SOMETHING instead. - -Library users can override any value by defining the pp-symbol with a compiler option, -e.g., - - cc -DR123_USE_MULHILO64_C99 - -will use a strictly c99 version of the full-width 64x64->128-bit multiplication -function, even if it would be disabled by default. - -All boolean-valued pre-processor symbols in Random123/features/compilerfeatures.h start with the prefix R123_USE_ -@verbatim - AES_NI - AES_OPENSSL - SSE4_2 - SSE4_1 - SSE - - STD_RANDOM - - GNU_UINT128 - ASM_GNU - ASM_MSASM - - CPUID_MSVC - - CXX11_RANDOM - CXX11_TYPE_TRAITS - CXX11_STATIC_ASSERT - CXX11_CONSTEXPR - CXX11_UNRESTRICTED_UNIONS - CXX11_EXPLICIT_CONVERSIONS - CXX11_LONG_LONG - CXX11_STD_ARRAY - CXX11 - - X86INTRIN_H - IA32INTRIN_H - XMMINTRIN_H - EMMINTRIN_H - SMMINTRIN_H - WMMINTRIN_H - INTRIN_H - - MULHILO32_ASM - MULHILO64_ASM - MULHILO64_MSVC_INTRIN - MULHILO64_CUDA_INTRIN - MULHILO64_OPENCL_INTRIN - MULHILO64_C99 - - U01_DOUBLE - -@endverbatim -Most have obvious meanings. Some non-obvious ones: - -AES_NI and AES_OPENSSL are not mutually exclusive. You can have one, -both or neither. - -GNU_UINT128 says that it's safe to use __uint128_t, but it -does not require its use. In particular, it should be -used in mulhilo<uint64_t> only if MULHILO64_ASM is unset. - -If the XXXINTRIN_H macros are true, then one should -@code -#include <xxxintrin.h> -@endcode -to gain accesss to compiler intrinsics. - -The CXX11_SOME_FEATURE macros allow the code to use specific -features of the C++11 language and library. The catchall -In the absence of a specific CXX11_SOME_FEATURE, the feature -is controlled by the catch-all R123_USE_CXX11 macro. - -U01_DOUBLE defaults on, and can be turned off (set to 0) -if one does not want the utility functions that convert to double -(i.e. u01_*_53()), e.g. on OpenCL without the cl_khr_fp64 extension. - -There are a number of invariants that are always true. Application code may -choose to rely on these: - -<ul> -<li>ASM_GNU and ASM_MASM are mutually exclusive -<li>The "higher" SSE values imply the lower ones. -</ul> - -There are also non-boolean valued symbols: - -<ul> -<li>R123_STATIC_INLINE - - According to both C99 and GNU99, the 'static inline' declaration allows - the compiler to not emit code if the function is not used. - Note that the semantics of 'inline', 'static' and 'extern' in - gcc have changed over time and are subject to modification by - command line options, e.g., -std=gnu89, -fgnu-inline. - Nevertheless, it appears that the meaning of 'static inline' - has not changed over time and (with a little luck) the use of 'static inline' - here will be portable between versions of gcc and to other C99 - compilers. - See: http://gcc.gnu.org/onlinedocs/gcc/Inline.html - http://www.greenend.org.uk/rjk/2003/03/inline.html - -<li>R123_FORCE_INLINE(decl) - - which expands to 'decl', adorned with the compiler-specific - embellishments to strongly encourage that the declared function be - inlined. If there is no such compiler-specific magic, it should - expand to decl, unadorned. - -<li>R123_CUDA_DEVICE - which expands to __device__ (or something else with - sufficiently similar semantics) when CUDA is in use, and expands - to nothing in other cases. - -<li>R123_METAL_THREAD_ADDRESS_SPACE - which expands to 'thread' (or - something else with sufficiently similar semantics) when compiling a - Metal kernel, and expands to nothing in other cases. - -<li>R123_ASSERT(x) - which expands to assert(x), or maybe to nothing at - all if we're in an environment so feature-poor that you can't even - call assert (I'm looking at you, CUDA and OpenCL), or even include - assert.h safely (OpenCL). - -<li>R123_STATIC_ASSERT(expr,msg) - which expands to - static_assert(expr,msg), or to an expression that - will raise a compile-time exception if expr is not true. - -<li>R123_ULONG_LONG - which expands to a declaration of the longest available - unsigned integer. - -<li>R123_64BIT(x) - expands to something equivalent to - UINT64_C(x) from <stdint.h>, even in environments where <stdint.h> - is not available, e.g., MSVC and OpenCL. - -<li>R123_BUILTIN_EXPECT(expr,likely_value) - expands to something with - the semantics of gcc's __builtin_expect(expr,likely_value). If - the environment has nothing like __builtin_expect, it should expand - to just expr. -</ul> - - -\cond HIDDEN_FROM_DOXYGEN -*/ - -/* -N.B. When something is added to the list of features, it should be -added to each of the *features.h files, AND to examples/ut_features.cpp. -*/ - -/* N.B. most other compilers (icc, nvcc, open64, llvm) will also define __GNUC__, so order matters. */ -#if defined(__METAL_MACOS__) -#include "metalfeatures.h" -#elif defined(__OPENCL_VERSION__) && __OPENCL_VERSION__ > 0 -#include "openclfeatures.h" -#elif defined(__ICC) -#include "iccfeatures.h" -#elif defined(__xlC__) -#include "xlcfeatures.h" -#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) -#include "sunprofeatures.h" -#elif defined(__OPEN64__) -#include "open64features.h" -#elif defined(__clang__) -#include "clangfeatures.h" -#elif defined(__GNUC__) -#include "gccfeatures.h" -#elif defined(__PGI) -#include "pgccfeatures.h" -#elif defined(_MSC_FULL_VER) -#include "msvcfeatures.h" -#else -#error "Can't identify compiler. You'll need to add a new xxfeatures.hpp" -{ /* maybe an unbalanced brace will terminate the compilation */ -#endif - -#ifndef R123_USE_CXX11 -#define R123_USE_CXX11 (__cplusplus >= 201103L) -#endif - -#ifndef R123_USE_CXX11_UNRESTRICTED_UNIONS -#define R123_USE_CXX11_UNRESTRICTED_UNIONS R123_USE_CXX11 -#endif - -#ifndef R123_USE_CXX11_STATIC_ASSERT -#define R123_USE_CXX11_STATIC_ASSERT R123_USE_CXX11 -#endif - -#ifndef R123_USE_CXX11_CONSTEXPR -#define R123_USE_CXX11_CONSTEXPR R123_USE_CXX11 -#endif - -#ifndef R123_USE_CXX11_EXPLICIT_CONVERSIONS -#define R123_USE_CXX11_EXPLICIT_CONVERSIONS R123_USE_CXX11 -#endif - -#ifndef R123_USE_CXX11_RANDOM -#define R123_USE_CXX11_RANDOM R123_USE_CXX11 -#endif - -#ifndef R123_USE_CXX11_TYPE_TRAITS -#define R123_USE_CXX11_TYPE_TRAITS R123_USE_CXX11 -#endif - -#ifndef R123_USE_CXX11_LONG_LONG -#define R123_USE_CXX11_LONG_LONG R123_USE_CXX11 -#endif - -#ifndef R123_USE_CXX11_STD_ARRAY -#define R123_USE_CXX11_STD_ARRAY R123_USE_CXX11 -#endif - -#ifndef R123_USE_MULHILO64_C99 -#define R123_USE_MULHILO64_C99 0 -#endif - -#ifndef R123_USE_MULHILO64_MULHI_INTRIN -#define R123_USE_MULHILO64_MULHI_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO32_MULHI_INTRIN -#define R123_USE_MULHILO32_MULHI_INTRIN 0 -#endif - -#ifndef R123_STATIC_ASSERT -#if R123_USE_CXX11_STATIC_ASSERT -#define R123_STATIC_ASSERT(expr, msg) static_assert(expr, msg) -#else - /* if msg always_looked_like_this, we could paste it into the name. Worth it? */ -#define R123_STATIC_ASSERT(expr, msg) typedef char static_assertion[(!!(expr))*2-1] -#endif -#endif - -#ifndef R123_CONSTEXPR -#if R123_USE_CXX11_CONSTEXPR -#define R123_CONSTEXPR constexpr -#else -#define R123_CONSTEXPR -#endif -#endif - -#ifndef R123_USE_64BIT -#define R123_USE_64BIT 1 -#endif - -#ifndef R123_USE_PHILOX_64BIT -#define R123_USE_PHILOX_64BIT (R123_USE_64BIT && (R123_USE_MULHILO64_ASM || R123_USE_MULHILO64_MSVC_INTRIN || R123_USE_MULHILO64_CUDA_INTRIN || R123_USE_GNU_UINT128 || R123_USE_MULHILO64_C99 || R123_USE_MULHILO64_OPENCL_INTRIN || R123_USE_MULHILO64_MULHI_INTRIN)) -#endif - -#ifndef R123_ULONG_LONG -#if defined(__cplusplus) && !R123_USE_CXX11_LONG_LONG -/* C++98 doesn't have long long. It doesn't have uint64_t either, but - we will have typedef'ed uint64_t to something in the xxxfeatures.h. - With luck, it won't elicit complaints from -pedantic. Cross your - fingers... */ -#define R123_ULONG_LONG uint64_t -#else -#define R123_ULONG_LONG unsigned long long -#endif -#endif - -/* UINT64_C should have been #defined by XXXfeatures.h, either by - #include <stdint.h> or through compiler-dependent hacks */ -#ifndef R123_64BIT -#define R123_64BIT(x) UINT64_C(x) -#endif - -#ifndef R123_THROW -#define R123_THROW(x) throw (x) -#endif - -#ifndef R123_METAL_THREAD_ADDRESS_SPACE -#define R123_METAL_THREAD_ADDRESS_SPACE -#endif - -#ifndef R123_METAL_CONSTANT_ADDRESS_SPACE -#define R123_METAL_CONSTANT_ADDRESS_SPACE -#endif - -/* - * Windows.h (and perhaps other "well-meaning" code define min and - * max, so there's a high chance that our definition of min, max - * methods or use of std::numeric_limits min and max will cause - * complaints in any program that happened to include Windows.h or - * suchlike first. We use the null macro below in our own header - * files definition or use of min, max to defensively preclude - * this problem. It may not be enough; one might need to #define - * NOMINMAX before including Windows.h or compile with -DNOMINMAX. - */ -#define R123_NO_MACRO_SUBST - -/** \endcond */ diff --git a/ext/random123/include/Random123/features/gccfeatures.h b/ext/random123/include/Random123/features/gccfeatures.h deleted file mode 100644 index 701f3c66..00000000 --- a/ext/random123/include/Random123/features/gccfeatures.h +++ /dev/null @@ -1,263 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __gccfeatures_dot_hpp -#define __gccfeatures_dot_hpp - -#define R123_GNUC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__) - -#if !defined(__x86_64__) && !defined(__i386__) && !defined(__powerpc__) && !defined(__arm__) && !defined(__aarch64__) -# error "This code has only been tested on x86, powerpc and a few arm platforms." -#include <including_a_nonexistent_file_will_stop_some_compilers_from_continuing_with_a_hopeless_task> -{ /* maybe an unbalanced brace will terminate the compilation */ - /* Feel free to try the Random123 library on other architectures by changing - the conditions that reach this error, but you should consider it a - porting exercise and expect to encounter bugs and deficiencies. - Please let the authors know of any successes (or failures). */ -#endif - -#ifdef __powerpc__ -#include <ppu_intrinsics.h> -#endif - -#ifndef R123_STATIC_INLINE -#define R123_STATIC_INLINE static __inline__ -#endif - -#ifndef R123_FORCE_INLINE -#if R123_GNUC_VERSION >= 40000 -#define R123_FORCE_INLINE(decl) decl __attribute__((always_inline)) -#else -#define R123_FORCE_INLINE(decl) decl -#endif -#endif - -#ifndef R123_CUDA_DEVICE -#define R123_CUDA_DEVICE -#endif - -#ifndef R123_ASSERT -#include <assert.h> -#define R123_ASSERT(x) assert(x) -#endif - -#ifndef R123_BUILTIN_EXPECT -#define R123_BUILTIN_EXPECT(expr,likely) __builtin_expect(expr,likely) -#endif - -/* According to the C++0x standard, we should be able to test the numeric - value of __cplusplus == 199701L for C++98, __cplusplus == 201103L for C++11 - But gcc has had an open bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=1773 - since early 2001, which was finally fixed in 4.7 (early 2012). For - earlier versions, the only way to detect whether --std=c++0x was requested - on the command line is to look at the __GCC_EXPERIMENTAL_CXX0X__ pp-symbol. -*/ -#if defined(__GCC_EXPERIMENTAL_CXX0X__) -#define GNU_CXX11 (__cplusplus>=201103L || (R123_GNUC_VERSION<40700 && 1/* defined(__GCC_EXPERIMENTAL_CXX0X__) */)) -#else -#define GNU_CXX11 (__cplusplus>=201103L || (R123_GNUC_VERSION<40700 && 0/* defined(__GCC_EXPERIMENTAL_CXX0X__) */)) -#endif - -#ifndef R123_USE_CXX11_UNRESTRICTED_UNIONS -#define R123_USE_CXX11_UNRESTRICTED_UNIONS ((R123_GNUC_VERSION >= 40600) && GNU_CXX11) -#endif - -#ifndef R123_USE_CXX11_STATIC_ASSERT -#define R123_USE_CXX11_STATIC_ASSERT ((R123_GNUC_VERSION >= 40300) && GNU_CXX11) -#endif - -#ifndef R123_USE_CXX11_CONSTEXPR -#define R123_USE_CXX11_CONSTEXPR ((R123_GNUC_VERSION >= 40600) && GNU_CXX11) -#endif - -#ifndef R123_USE_CXX11_EXPLICIT_CONVERSIONS -#define R123_USE_CXX11_EXPLICIT_CONVERSIONS ((R123_GNUC_VERSION >= 40500) && GNU_CXX11) -#endif - -#ifndef R123_USE_CXX11_RANDOM -#define R123_USE_CXX11_RANDOM ((R123_GNUC_VERSION>=40500) && GNU_CXX11) -#endif - -#ifndef R123_USE_CXX11_TYPE_TRAITS -#define R123_USE_CXX11_TYPE_TRAITS ((R123_GNUC_VERSION>=40400) && GNU_CXX11) -#endif - -#ifndef R123_USE_AES_NI -#ifdef __AES__ -#define R123_USE_AES_NI 1 -#else -#define R123_USE_AES_NI 0 -#endif -#endif - -#ifndef R123_USE_SSE4_2 -#ifdef __SSE4_2__ -#define R123_USE_SSE4_2 1 -#else -#define R123_USE_SSE4_2 0 -#endif -#endif - -#ifndef R123_USE_SSE4_1 -#ifdef __SSE4_1__ -#define R123_USE_SSE4_1 1 -#else -#define R123_USE_SSE4_1 0 -#endif -#endif - -#ifndef R123_USE_SSE -/* There's no point in trying to compile SSE code in Random123 - unless SSE2 is available. */ -#ifdef __SSE2__ -#define R123_USE_SSE 1 -#else -#define R123_USE_SSE 0 -#endif -#endif - -#ifndef R123_USE_AES_OPENSSL -/* There isn't really a good way to tell at compile time whether - openssl is available. Without a pre-compilation configure-like - tool, it's less error-prone to guess that it isn't available. Add - -DR123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to - play with openssl */ -#define R123_USE_AES_OPENSSL 0 -#endif - -#ifndef R123_USE_GNU_UINT128 -#if defined(__x86_64__) || defined(__aarch64__) -#define R123_USE_GNU_UINT128 1 -#else -#define R123_USE_GNU_UINT128 0 -#endif -#endif - -#ifndef R123_USE_ASM_GNU -#if (defined(__x86_64__)||defined(__i386__)) -#define R123_USE_ASM_GNU 1 -#else -#define R123_USE_ASM_GNU 1 -#endif -#endif - -#ifndef R123_USE_CPUID_MSVC -#define R123_USE_CPUID_MSVC 0 -#endif - -#ifndef R123_USE_X86INTRIN_H -#if (defined(__x86_64__)||defined(__i386__)) -#define R123_USE_X86INTRIN_H (1/* (defined(__x86_64__)||defined(__i386__)) */ && R123_GNUC_VERSION >= 40402) -#else -#define R123_USE_X86INTRIN_H (0/* (defined(__x86_64__)||defined(__i386__)) */ && R123_GNUC_VERSION >= 40402) -#endif -#endif - -#ifndef R123_USE_IA32INTRIN_H -#define R123_USE_IA32INTRIN_H 0 -#endif - -#ifndef R123_USE_XMMINTRIN_H -#define R123_USE_XMMINTRIN_H 0 -#endif - -#ifndef R123_USE_EMMINTRIN_H -/* gcc -m64 on Solaris 10 defines __SSE2__ but doesn't have - emmintrin.h in the include search path. This is - so broken that I refuse to try to work around it. If this - affects you, figure out where your emmintrin.h lives and - add an appropriate -I to your CPPFLAGS. Or add -DR123_USE_SSE=0. */ -#define R123_USE_EMMINTRIN_H (R123_USE_SSE && (R123_GNUC_VERSION < 40402)) -#endif - -#ifndef R123_USE_SMMINTRIN_H -#define R123_USE_SMMINTRIN_H ((R123_USE_SSE4_1 || R123_USE_SSE4_2) && (R123_GNUC_VERSION < 40402)) -#endif - -#ifndef R123_USE_WMMINTRIN_H -#define R123_USE_WMMINTRIN_H 0 -#endif - -#ifndef R123_USE_INTRIN_H -#define R123_USE_INTRIN_H 0 -#endif - -#ifndef R123_USE_MULHILO32_ASM -#define R123_USE_MULHILO32_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_MSVC_INTRIN -#define R123_USE_MULHILO64_MSVC_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_CUDA_INTRIN -#define R123_USE_MULHILO64_CUDA_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_OPENCL_INTRIN -#define R123_USE_MULHILO64_OPENCL_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_MULHI_INTRIN -#if (defined(__powerpc64__)) -#define R123_USE_MULHILO64_MULHI_INTRIN 1 -#else -#define R123_USE_MULHILO64_MULHI_INTRIN 0 -#endif -#endif - -#ifndef R123_MULHILO64_MULHI_INTRIN -#define R123_MULHILO64_MULHI_INTRIN __mulhdu -#endif - -#ifndef R123_USE_MULHILO32_MULHI_INTRIN -#define R123_USE_MULHILO32_MULHI_INTRIN 0 -#endif - -#ifndef R123_MULHILO32_MULHI_INTRIN -#define R123_MULHILO32_MULHI_INTRIN __mulhwu -#endif - -#ifndef __STDC_CONSTANT_MACROS -#define __STDC_CONSTANT_MACROS -#endif -#include <stdint.h> -#ifndef UINT64_C -#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h> -#endif - -/* If you add something, it must go in all the other XXfeatures.hpp - and in ../ut_features.cpp */ -#endif diff --git a/ext/random123/include/Random123/features/iccfeatures.h b/ext/random123/include/Random123/features/iccfeatures.h deleted file mode 100644 index 7e72dec1..00000000 --- a/ext/random123/include/Random123/features/iccfeatures.h +++ /dev/null @@ -1,212 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __icpcfeatures_dot_hpp -#define __icpcfeatures_dot_hpp - -// icc relies on gcc libraries and other toolchain components. -#define R123_GNUC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__) - -#if !defined(__x86_64__) && !defined(__i386__) -# error "This code has only been tested on x86 platforms." -{ // maybe an unbalanced brace will terminate the compilation -// You are invited to try Easy123 on other architectures, by changing -// the conditions that reach this error, but you should consider it a -// porting exercise and expect to encounter bugs and deficiencies. -// Please let the authors know of any successes (or failures). -#endif - -#ifndef R123_STATIC_INLINE -#define R123_STATIC_INLINE static inline -#endif - -#ifndef R123_FORCE_INLINE -#define R123_FORCE_INLINE(decl) decl __attribute__((always_inline)) -#endif - -#ifndef R123_CUDA_DEVICE -#define R123_CUDA_DEVICE -#endif - -#ifndef R123_ASSERT -#include <assert.h> -#define R123_ASSERT(x) assert(x) -#endif - -#ifndef R123_BUILTIN_EXPECT -#define R123_BUILTIN_EXPECT(expr,likely) __builtin_expect(expr,likely) -#endif - -// The basic idiom is: -// #ifndef R123_SOMETHING -// #if some condition -// #define R123_SOMETHING 1 -// #else -// #define R123_SOMETHING 0 -// #endif -// #endif -// This idiom allows an external user to override any decision -// in this file with a command-line -DR123_SOMETHING=1 or -DR123_SOMETHINE=0 - -// An alternative idiom is: -// #ifndef R123_SOMETHING -// #define R123_SOMETHING (some boolean expression) -// #endif -// where the boolean expression might contain previously-defined R123_SOMETHING_ELSE -// pp-symbols. - -#ifndef R123_USE_SSE4_2 -#ifdef __SSE4_2__ -#define R123_USE_SSE4_2 1 -#else -#define R123_USE_SSE4_2 0 -#endif -#endif - -#ifndef R123_USE_SSE4_1 -#ifdef __SSE4_1__ -#define R123_USE_SSE4_1 1 -#else -#define R123_USE_SSE4_1 0 -#endif -#endif - -#ifndef R123_USE_SSE -#ifdef __SSE2__ -#define R123_USE_SSE 1 -#else -#define R123_USE_SSE 0 -#endif -#endif - -#ifndef R123_USE_AES_NI -// Unlike gcc, icc (version 12) does not pre-define an __AES__ -// pp-symbol when -maes or -xHost is on the command line. This feels -// like a defect in icc (it defines __SSE4_2__ in analogous -// circumstances), but until Intel fixes it, we're better off erring -// on the side of caution and not generating instructions that are -// going to raise SIGILL when executed. To get the AES-NI -// instructions with icc, the caller must puts something like -// -DR123_USE_AES_NI=1 or -D__AES__ on the command line. FWIW, the -// AES-NI Whitepaper by Gueron says that icc has supported AES-NI from -// 11.1 onwards. -// -#if defined(__AES__) -#define R123_USE_AES_NI ((__ICC>=1101) && 1/*defined(__AES__)*/) -#else -#define R123_USE_AES_NI ((__ICC>=1101) && 0/*defined(__AES__)*/) -#endif -#endif - -#ifndef R123_USE_AES_OPENSSL -/* There isn't really a good way to tell at compile time whether - openssl is available. Without a pre-compilation configure-like - tool, it's less error-prone to guess that it isn't available. Add - -DR123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to - play with openssl */ -#define R123_USE_AES_OPENSSL 0 -#endif - -#ifndef R123_USE_GNU_UINT128 -#define R123_USE_GNU_UINT128 0 -#endif - -#ifndef R123_USE_ASM_GNU -#define R123_USE_ASM_GNU 1 -#endif - -#ifndef R123_USE_CPUID_MSVC -#define R123_USE_CPUID_MSVC 0 -#endif - -#ifndef R123_USE_X86INTRIN_H -#define R123_USE_X86INTRIN_H 0 -#endif - -#ifndef R123_USE_IA32INTRIN_H -#define R123_USE_IA32INTRIN_H 1 -#endif - -#ifndef R123_USE_XMMINTRIN_H -#define R123_USE_XMMINTRIN_H 0 -#endif - -#ifndef R123_USE_EMMINTRIN_H -#define R123_USE_EMMINTRIN_H 1 -#endif - -#ifndef R123_USE_SMMINTRIN_H -#define R123_USE_SMMINTRIN_H 1 -#endif - -#ifndef R123_USE_WMMINTRIN_H -#define R123_USE_WMMINTRIN_H 1 -#endif - -#ifndef R123_USE_INTRIN_H -#define R123_USE_INTRIN_H 0 -#endif - -#ifndef R123_USE_MULHILO16_ASM -#define R123_USE_MULHILO16_ASM 0 -#endif - -#ifndef R123_USE_MULHILO32_ASM -#define R123_USE_MULHILO32_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM 1 -#endif - -#ifndef R123_USE_MULHILO64_MSVC_INTRIN -#define R123_USE_MULHILO64_MSVC_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_CUDA_INTRIN -#define R123_USE_MULHILO64_CUDA_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_OPENCL_INTRIN -#define R123_USE_MULHILO64_OPENCL_INTRIN 0 -#endif - -#ifndef __STDC_CONSTANT_MACROS -#define __STDC_CONSTANT_MACROS -#endif -#include <stdint.h> -#ifndef UINT64_C -#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h> -#endif - -// If you add something, it must go in all the other XXfeatures.hpp -// and in ../ut_features.cpp -#endif diff --git a/ext/random123/include/Random123/features/metalfeatures.h b/ext/random123/include/Random123/features/metalfeatures.h deleted file mode 100644 index bafe51a6..00000000 --- a/ext/random123/include/Random123/features/metalfeatures.h +++ /dev/null @@ -1,111 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* - * Written by Tom Schoonjans <Tom.Schoonjans@me.com> - */ - -#ifndef __metalfeatures_dot_hpp -#define __metalfeatures_dot_hpp - -#ifndef R123_STATIC_INLINE -#define R123_STATIC_INLINE inline -#endif - -#ifndef R123_FORCE_INLINE -#define R123_FORCE_INLINE(decl) decl __attribute__((always_inline)) -#endif - -#ifndef R123_CUDA_DEVICE -#define R123_CUDA_DEVICE -#endif - -#ifndef R123_METAL_THREAD_ADDRESS_SPACE -#define R123_METAL_THREAD_ADDRESS_SPACE thread -#endif - -#ifndef R123_METAL_CONSTANT_ADDRESS_SPACE -#define R123_METAL_CONSTANT_ADDRESS_SPACE constant -#endif - -#ifndef R123_ASSERT -#define R123_ASSERT(x) -#endif - -#ifndef R123_BUILTIN_EXPECT -#define R123_BUILTIN_EXPECT(expr,likely) expr -#endif - -#ifndef R123_USE_GNU_UINT128 -#define R123_USE_GNU_UINT128 0 -#endif - -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_MSVC_INTRIN -#define R123_USE_MULHILO64_MSVC_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_CUDA_INTRIN -#define R123_USE_MULHILO64_CUDA_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_OPENCL_INTRIN -#define R123_USE_MULHILO64_OPENCL_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO32_MULHI_INTRIN -#define R123_USE_MULHILO32_MULHI_INTRIN 1 -#endif - -#if R123_USE_MULHILO32_MULHI_INTRIN -#include <metal_integer> -#define R123_MULHILO32_MULHI_INTRIN metal::mulhi -#endif - -#ifndef R123_USE_AES_NI -#define R123_USE_AES_NI 0 -#endif - -#ifndef R123_USE_64BIT -#define R123_USE_64BIT 0 /* Metal currently (Feb 2019, Specification-2) does not support 64-bit variable types */ -#endif - -#ifndef R123_ULONG_LONG -/* the longest integer type in Metal (Feb 2019, Specification-2) is a - * 32-bit unsigned int. Let's hope for the best... */ -#define R123_ULONG_LONG unsigned int -#endif - -#endif diff --git a/ext/random123/include/Random123/features/msvcfeatures.h b/ext/random123/include/Random123/features/msvcfeatures.h deleted file mode 100644 index 9eb95209..00000000 --- a/ext/random123/include/Random123/features/msvcfeatures.h +++ /dev/null @@ -1,200 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __msvcfeatures_dot_hpp -#define __msvcfeatures_dot_hpp - -//#if _MSVC_FULL_VER <= 15 -//#error "We've only tested MSVC_FULL_VER==15." -//#endif - -#if !defined(_M_IX86) && !defined(_M_X64) -# error "This code has only been tested on x86 platforms." -{ // maybe an unbalanced brace will terminate the compilation -// You are invited to try Random123 on other architectures, by changing -// the conditions that reach this error, but you should consider it a -// porting exercise and expect to encounter bugs and deficiencies. -// Please let the authors know of any successes (or failures). -#endif - -#ifndef R123_STATIC_INLINE -#define R123_STATIC_INLINE static __inline -#endif - -#ifndef R123_FORCE_INLINE -#define R123_FORCE_INLINE(decl) _forceinline decl -#endif - -#ifndef R123_CUDA_DEVICE -#define R123_CUDA_DEVICE -#endif - -#ifndef R123_ASSERT -#include <assert.h> -#define R123_ASSERT(x) assert(x) -#endif - -#ifndef R123_BUILTIN_EXPECT -#define R123_BUILTIN_EXPECT(expr,likely) expr -#endif - -// The basic idiom is: -// #ifndef R123_SOMETHING -// #if some condition -// #define R123_SOMETHING 1 -// #else -// #define R123_SOMETHING 0 -// #endif -// #endif -// This idiom allows an external user to override any decision -// in this file with a command-line -DR123_SOMETHING=1 or -DR123_SOMETHINE=0 - -// An alternative idiom is: -// #ifndef R123_SOMETHING -// #define R123_SOMETHING (some boolean expression) -// #endif -// where the boolean expression might contain previously-defined R123_SOMETHING_ELSE -// pp-symbols. - -#ifndef R123_USE_AES_NI -#if defined(_M_X64) -#define R123_USE_AES_NI 1 -#else -#define R123_USE_AES_NI 0 -#endif -#endif - -#ifndef R123_USE_SSE4_2 -#if defined(_M_X64) -#define R123_USE_SSE4_2 1 -#else -#define R123_USE_SSE4_2 0 -#endif -#endif - -#ifndef R123_USE_SSE4_1 -#if defined(_M_X64) -#define R123_USE_SSE4_1 1 -#else -#define R123_USE_SSE4_1 0 -#endif -#endif - -#ifndef R123_USE_SSE -#define R123_USE_SSE 1 -#endif - -#ifndef R123_USE_AES_OPENSSL -#define R123_USE_AES_OPENSSL 0 -#endif - -#ifndef R123_USE_GNU_UINT128 -#define R123_USE_GNU_UINT128 0 -#endif - -#ifndef R123_USE_ASM_GNU -#define R123_USE_ASM_GNU 0 -#endif - -#ifndef R123_USE_CPUID_MSVC -#define R123_USE_CPUID_MSVC 1 -#endif - -#ifndef R123_USE_X86INTRIN_H -#define R123_USE_X86INTRIN_H 0 -#endif - -#ifndef R123_USE_IA32INTRIN_H -#define R123_USE_IA32INTRIN_H 0 -#endif - -#ifndef R123_USE_XMMINTRIN_H -#define R123_USE_XMMINTRIN_H 0 -#endif - -#ifndef R123_USE_EMMINTRIN_H -#define R123_USE_EMMINTRIN_H 1 -#endif - -#ifndef R123_USE_SMMINTRIN_H -#define R123_USE_SMMINTRIN_H 1 -#endif - -#ifndef R123_USE_WMMINTRIN_H -#define R123_USE_WMMINTRIN_H 1 -#endif - -#ifndef R123_USE_INTRIN_H -#define R123_USE_INTRIN_H 1 -#endif - -#ifndef R123_USE_MULHILO16_ASM -#define R123_USE_MULHILO16_ASM 0 -#endif - -#ifndef R123_USE_MULHILO32_ASM -#define R123_USE_MULHILO32_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_MSVC_INTRIN -#if defined(_M_X64) -#define R123_USE_MULHILO64_MSVC_INTRIN 1 -#else -#define R123_USE_MULHILO64_MSVC_INTRIN 0 -#endif -#endif - -#ifndef R123_USE_MULHILO64_CUDA_INTRIN -#define R123_USE_MULHILO64_CUDA_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_OPENCL_INTRIN -#define R123_USE_MULHILO64_OPENCL_INTRIN 0 -#endif - -#ifndef __STDC_CONSTANT_MACROS -#define __STDC_CONSTANT_MACROS -#endif -#include <stdint.h> -#ifndef UINT64_C -#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h> -#endif - -#pragma warning(disable:4244) -#pragma warning(disable:4996) - -// If you add something, it must go in all the other XXfeatures.hpp -// and in ../ut_features.cpp -#endif diff --git a/ext/random123/include/Random123/features/nvccfeatures.h b/ext/random123/include/Random123/features/nvccfeatures.h deleted file mode 100644 index d1ff8bf5..00000000 --- a/ext/random123/include/Random123/features/nvccfeatures.h +++ /dev/null @@ -1,125 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __r123_nvcc_features_dot_h__ -#define __r123_nvcc_features_dot_h__ - -#if !defined(CUDART_VERSION) -#error "why are we in nvccfeatures.h if CUDART_VERSION is not defined" -#endif - -#if CUDART_VERSION < 4010 -#error "CUDA versions earlier than 4.1 produce incorrect results for some templated functions in namespaces. Random123 isunsupported. See comments in nvccfeatures.h" -// This test was added in Random123-1.08 (August, 2013) because we -// discovered that Ftype(maxTvalue<T>()) with Ftype=double and -// T=uint64_t in examples/uniform.hpp produces -1 for CUDA4.0 and -// earlier. We can't be sure this bug doesn't also affect invocations -// of other templated functions, e.g., essentially all of Random123. -// Thus, we no longer trust CUDA versions earlier than 4.1 even though -// we had previously tested and timed Random123 with CUDA 3.x and 4.0. -// If you feel lucky or desperate, you can change #error to #warning, but -// please take extra care to be sure that you are getting correct -// results. -#endif - -// nvcc falls through to gcc or msvc. So first define -// a couple of things and then include either gccfeatures.h -// or msvcfeatures.h - -//#ifdef __CUDA_ARCH__ allows Philox32 and Philox64 to be compiled -//for both device and host functions in CUDA by setting compiler flags -//for the device function -#ifdef __CUDA_ARCH__ -#ifndef R123_CUDA_DEVICE -#define R123_CUDA_DEVICE __device__ -#endif - -#ifndef R123_USE_MULHILO64_CUDA_INTRIN -#define R123_USE_MULHILO64_CUDA_INTRIN 1 -#endif - -#ifndef R123_THROW -// No exceptions in CUDA, at least upto 4.0 -#define R123_THROW(x) R123_ASSERT(0) -#endif - -#ifndef R123_ASSERT -#define R123_ASSERT(x) if((x)) ; else asm("trap;") -#endif - -#else // ! __CUDA_ARCH__ -// If we're using nvcc not compiling for the CUDA architecture, -// then we must be compiling for the host. In that case, -// tell the philox code to use the mulhilo64 asm because -// nvcc doesn't grok uint128_t. -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM 1 -#endif - -#endif // __CUDA_ARCH__ - -#ifndef R123_BUILTIN_EXPECT -#define R123_BUILTIN_EXPECT(expr,likely) expr -#endif - -#ifndef R123_USE_AES_NI -#define R123_USE_AES_NI 0 -#endif - -#ifndef R123_USE_SSE4_2 -#define R123_USE_SSE4_2 0 -#endif - -#ifndef R123_USE_SSE4_1 -#define R123_USE_SSE4_1 0 -#endif - -#ifndef R123_USE_SSE -#define R123_USE_SSE 0 -#endif - -#ifndef R123_USE_GNU_UINT128 -#define R123_USE_GNU_UINT128 0 -#endif - -#ifndef R123_ULONG_LONG -// uint64_t, which is what we'd get without this, is -// not the same as unsigned long long -#define R123_ULONG_LONG unsigned long long -#endif - -#if defined(__GNUC__) -#include "gccfeatures.h" -#elif defined(_MSC_FULL_VER) -#include "msvcfeatures.h" -#endif - -#endif diff --git a/ext/random123/include/Random123/features/open64features.h b/ext/random123/include/Random123/features/open64features.h deleted file mode 100644 index 8da9f5f5..00000000 --- a/ext/random123/include/Random123/features/open64features.h +++ /dev/null @@ -1,50 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __open64features_dot_hpp -#define __open64features_dot_hpp - -/* The gcc features are mostly right. We just override a few and then include gccfeatures.h */ - -/* Open64 4.2.3 and 4.2.4 accept the __uint128_t code without complaint - but produce incorrect code for 64-bit philox. The MULHILO64_ASM - seems to work fine */ -#ifndef R123_USE_GNU_UINT128 -#define R123_USE_GNU_UINT128 0 -#endif - -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM 1 -#endif - -#include "gccfeatures.h" - -#endif diff --git a/ext/random123/include/Random123/features/openclfeatures.h b/ext/random123/include/Random123/features/openclfeatures.h deleted file mode 100644 index af03d309..00000000 --- a/ext/random123/include/Random123/features/openclfeatures.h +++ /dev/null @@ -1,89 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __openclfeatures_dot_hpp -#define __openclfeatures_dot_hpp - -#ifndef R123_STATIC_INLINE -#define R123_STATIC_INLINE inline -#endif - -#ifndef R123_FORCE_INLINE -#define R123_FORCE_INLINE(decl) decl __attribute__((always_inline)) -#endif - -#ifndef R123_CUDA_DEVICE -#define R123_CUDA_DEVICE -#endif - -#ifndef R123_ASSERT -#define R123_ASSERT(x) -#endif - -#ifndef R123_BUILTIN_EXPECT -#define R123_BUILTIN_EXPECT(expr,likely) expr -#endif - -#ifndef R123_USE_GNU_UINT128 -#define R123_USE_GNU_UINT128 0 -#endif - -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_MSVC_INTRIN -#define R123_USE_MULHILO64_MSVC_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_CUDA_INTRIN -#define R123_USE_MULHILO64_CUDA_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_OPENCL_INTRIN -#define R123_USE_MULHILO64_OPENCL_INTRIN 1 -#endif - -#ifndef R123_USE_AES_NI -#define R123_USE_AES_NI 0 -#endif - -// XXX ATI APP SDK 2.4 clBuildProgram SEGVs if one uses uint64_t instead of -// ulong to mul_hi. And gets lots of complaints from stdint.h -// on some machines. -// But these typedefs mean we cannot include stdint.h with -// these headers? Do we need R123_64T, R123_32T, R123_8T? -typedef ulong uint64_t; -typedef uint uint32_t; -typedef uchar uint8_t; -#define UINT64_C(x) ((ulong)(x##UL)) - -#endif diff --git a/ext/random123/include/Random123/features/pgccfeatures.h b/ext/random123/include/Random123/features/pgccfeatures.h deleted file mode 100644 index 18ace135..00000000 --- a/ext/random123/include/Random123/features/pgccfeatures.h +++ /dev/null @@ -1,194 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Copyright (c) 2013, Los Alamos National Security, LLC -All rights reserved. - -Copyright 2013. Los Alamos National Security, LLC. This software was produced -under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National -Laboratory (LANL), which is operated by Los Alamos National Security, LLC for -the U.S. Department of Energy. The U.S. Government has rights to use, -reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS -ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR -ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified -to produce derivative works, such modified software should be clearly marked, -so as not to confuse it with the version available from LANL. -*/ -#ifndef __pgccfeatures_dot_hpp -#define __pgccfeatures_dot_hpp - -#if !defined(__x86_64__) && !defined(__i386__) -# error "This code has only been tested on x86 platforms." -#include <including_a_nonexistent_file_will_stop_some_compilers_from_continuing_with_a_hopeless_task> -{ /* maybe an unbalanced brace will terminate the compilation */ - /* Feel free to try the Random123 library on other architectures by changing - the conditions that reach this error, but you should consider it a - porting exercise and expect to encounter bugs and deficiencies. - Please let the authors know of any successes (or failures). */ -#endif - -#ifndef R123_STATIC_INLINE -#define R123_STATIC_INLINE static inline -#endif - -/* Found this example in PGI's emmintrin.h. */ -#ifndef R123_FORCE_INLINE -#define R123_FORCE_INLINE(decl) decl __attribute__((__always_inline__)) -#endif - -#ifndef R123_CUDA_DEVICE -#define R123_CUDA_DEVICE -#endif - -#ifndef R123_ASSERT -#include <assert.h> -#define R123_ASSERT(x) assert(x) -#endif - -#ifndef R123_BUILTIN_EXPECT -#define R123_BUILTIN_EXPECT(expr,likely) (expr) -#endif - -/* PGI through 13.2 doesn't appear to support AES-NI. */ -#ifndef R123_USE_AES_NI -#define R123_USE_AES_NI 0 -#endif - -/* PGI through 13.2 appears to support MMX, SSE, SSE3, SSE3, SSSE3, SSE4a, and - ABM, but not SSE4.1 or SSE4.2. */ -#ifndef R123_USE_SSE4_2 -#define R123_USE_SSE4_2 0 -#endif - -#ifndef R123_USE_SSE4_1 -#define R123_USE_SSE4_1 0 -#endif - -#ifndef R123_USE_SSE -/* There's no point in trying to compile SSE code in Random123 - unless SSE2 is available. */ -#ifdef __SSE2__ -#define R123_USE_SSE 1 -#else -#define R123_USE_SSE 0 -#endif -#endif - -#ifndef R123_USE_AES_OPENSSL -/* There isn't really a good way to tell at compile time whether - openssl is available. Without a pre-compilation configure-like - tool, it's less error-prone to guess that it isn't available. Add - -DR123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to - play with openssl */ -#define R123_USE_AES_OPENSSL 0 -#endif - -#ifndef R123_USE_GNU_UINT128 -#define R123_USE_GNU_UINT128 0 -#endif - -#ifndef R123_USE_ASM_GNU -#define R123_USE_ASM_GNU 1 -#endif - -#ifndef R123_USE_CPUID_MSVC -#define R123_USE_CPUID_MSVC 0 -#endif - -#ifndef R123_USE_X86INTRIN_H -#define R123_USE_X86INTRIN_H 0 -#endif - -#ifndef R123_USE_IA32INTRIN_H -#define R123_USE_IA32INTRIN_H 0 -#endif - -/* emmintrin.h from PGI #includes xmmintrin.h but then complains at link time - about undefined references to _mm_castsi128_ps(__m128i). Why? */ -#ifndef R123_USE_XMMINTRIN_H -#define R123_USE_XMMINTRIN_H 1 -#endif - -#ifndef R123_USE_EMMINTRIN_H -#define R123_USE_EMMINTRIN_H 1 -#endif - -#ifndef R123_USE_SMMINTRIN_H -#define R123_USE_SMMINTRIN_H 0 -#endif - -#ifndef R123_USE_WMMINTRIN_H -#define R123_USE_WMMINTRIN_H 0 -#endif - -#ifndef R123_USE_INTRIN_H -#ifdef __ABM__ -#define R123_USE_INTRIN_H 1 -#else -#define R123_USE_INTRIN_H 0 -#endif -#endif - -#ifndef R123_USE_MULHILO32_ASM -#define R123_USE_MULHILO32_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_MULHI_INTRIN -#define R123_USE_MULHILO64_MULHI_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM 1 -#endif - -#ifndef R123_USE_MULHILO64_MSVC_INTRIN -#define R123_USE_MULHILO64_MSVC_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_CUDA_INTRIN -#define R123_USE_MULHILO64_CUDA_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_OPENCL_INTRIN -#define R123_USE_MULHILO64_OPENCL_INTRIN 0 -#endif - -#ifndef __STDC_CONSTANT_MACROS -#define __STDC_CONSTANT_MACROS -#endif -#include <stdint.h> -#ifndef UINT64_C -#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h> -#endif - -/* If you add something, it must go in all the other XXfeatures.hpp - and in ../ut_features.cpp */ -#endif diff --git a/ext/random123/include/Random123/features/sse.h b/ext/random123/include/Random123/features/sse.h deleted file mode 100644 index 3a49ebd8..00000000 --- a/ext/random123/include/Random123/features/sse.h +++ /dev/null @@ -1,280 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef _Random123_sse_dot_h__ -#define _Random123_sse_dot_h__ - -#if R123_USE_SSE - -#if R123_USE_X86INTRIN_H -#include <x86intrin.h> -#endif -#if R123_USE_IA32INTRIN_H -#include <ia32intrin.h> -#endif -#if R123_USE_XMMINTRIN_H -#include <xmmintrin.h> -#endif -#if R123_USE_EMMINTRIN_H -#include <emmintrin.h> -#endif -#if R123_USE_SMMINTRIN_H -#include <smmintrin.h> -#endif -#if R123_USE_WMMINTRIN_H -#include <wmmintrin.h> -#endif -#if R123_USE_INTRIN_H -#include <intrin.h> -#endif -#ifdef __cplusplus -#include <iostream> -#include <limits> -#include <stdexcept> -#endif - -#if R123_USE_ASM_GNU - -/* bit25 of CX tells us whether AES is enabled. */ -R123_STATIC_INLINE int haveAESNI(){ - unsigned int eax, ebx, ecx, edx; - __asm__ __volatile__ ("cpuid": "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : - "a" (1)); - return (ecx>>25) & 1; -} -#elif R123_USE_CPUID_MSVC -R123_STATIC_INLINE int haveAESNI(){ - int CPUInfo[4]; - __cpuid(CPUInfo, 1); - return (CPUInfo[2]>>25)&1; -} -#else /* R123_USE_CPUID_??? */ -#warning "No R123_USE_CPUID_XXX method chosen. haveAESNI will always return false" -R123_STATIC_INLINE int haveAESNI(){ - return 0; -} -#endif /* R123_USE_ASM_GNU || R123_USE_CPUID_MSVC */ - -// There is a lot of annoying and inexplicable variation in the -// SSE intrinsics available in different compilation environments. -// The details seem to depend on the compiler, the version and -// the target architecture. Rather than insisting on -// R123_USE_feature tests for each of these in each of the -// compilerfeatures.h files we just keep the complexity localized -// to here... -#if (defined(__ICC) && __ICC<1210) || (defined(_MSC_VER) && !defined(_WIN64)) -/* Is there an intrinsic to assemble an __m128i from two 64-bit words? - If not, use the 4x32-bit intrisic instead. N.B. It looks like Intel - added _mm_set_epi64x to icc version 12.1 in Jan 2012. -*/ -R123_STATIC_INLINE __m128i _mm_set_epi64x(uint64_t v1, uint64_t v0){ - union{ - uint64_t u64; - uint32_t u32[2]; - } u1, u0; - u1.u64 = v1; - u0.u64 = v0; - return _mm_set_epi32(u1.u32[1], u1.u32[0], u0.u32[1], u0.u32[0]); -} -#endif -/* _mm_extract_lo64 abstracts the task of extracting the low 64-bit - word from an __m128i. The _mm_cvtsi128_si64 intrinsic does the job - on 64-bit platforms. Unfortunately, both MSVC and Open64 fail - assertions in ut_M128.cpp and ut_carray.cpp when we use the - _mm_cvtsi128_si64 intrinsic. (See - https://bugs.open64.net/show_bug.cgi?id=873 for the Open64 bug). - On 32-bit platforms, there's no MOVQ, so there's no intrinsic. - Finally, even if the intrinsic exists, it may be spelled with or - without the 'x'. -*/ -#if !defined(__x86_64__) || defined(_MSC_VER) || defined(__OPEN64__) -R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){ - union{ - uint64_t u64[2]; - __m128i m; - }u; - _mm_store_si128(&u.m, si); - return u.u64[0]; -} -#elif defined(__llvm__) || defined(__ICC) -R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){ - return (uint64_t)_mm_cvtsi128_si64(si); -} -#else /* GNUC, others */ -/* FWIW, gcc's emmintrin.h has had the 'x' spelling - since at least gcc-3.4.4. The no-'x' spelling showed up - around 4.2. */ -R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){ - return (uint64_t)_mm_cvtsi128_si64x(si); -} -#endif -#if defined(__GNUC__) && __GNUC__ < 4 -/* the cast builtins showed up in gcc4. */ -R123_STATIC_INLINE __m128 _mm_castsi128_ps(__m128i si){ - return (__m128)si; -} -#endif - -#ifdef __cplusplus - -struct r123m128i{ - __m128i m; -#if R123_USE_CXX11_UNRESTRICTED_UNIONS - // C++98 forbids a union member from having *any* constructors. - // C++11 relaxes this, and allows union members to have constructors - // as long as there is a "trivial" default construtor. So in C++11 - // we can provide a r123m128i constructor with an __m128i argument, and still - // have the default (and hence trivial) default constructor. - r123m128i() = default; - r123m128i(__m128i _m): m(_m){} -#endif - r123m128i& operator=(const __m128i& rhs){ m=rhs; return *this;} - r123m128i& operator=(R123_ULONG_LONG n){ m = _mm_set_epi64x(0, n); return *this;} -#if R123_USE_CXX11_EXPLICIT_CONVERSIONS - // With C++11 we can attach explicit to the bool conversion operator - // to disambiguate undesired promotions. For g++, this works - // only in 4.5 and above. - explicit operator bool() const {return _bool();} -#else - // Pre-C++11, we have to do something else. Google for the "safe bool" - // idiom for other ideas... - operator const void*() const{return _bool()?this:0;} -#endif - operator __m128i() const {return m;} - -private: -#if R123_USE_SSE4_1 - bool _bool() const{ return !_mm_testz_si128(m,m); } -#else - bool _bool() const{ return 0xf != _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(m, _mm_setzero_si128()))); } -#endif -}; - -R123_STATIC_INLINE r123m128i& operator++(r123m128i& v){ - __m128i& c = v.m; - __m128i zeroone = _mm_set_epi64x(R123_64BIT(0), R123_64BIT(1)); - c = _mm_add_epi64(c, zeroone); - //return c; -#if R123_USE_SSE4_1 - __m128i zerofff = _mm_set_epi64x(0, ~(R123_64BIT(0))); - if( R123_BUILTIN_EXPECT(_mm_testz_si128(c,zerofff), 0) ){ - __m128i onezero = _mm_set_epi64x(R123_64BIT(1), R123_64BIT(0)); - c = _mm_add_epi64(c, onezero); - } -#else - unsigned mask = _mm_movemask_ps( _mm_castsi128_ps(_mm_cmpeq_epi32(c, _mm_setzero_si128()))); - // The low two bits of mask are 11 iff the low 64 bits of - // c are zero. - if( R123_BUILTIN_EXPECT((mask&0x3) == 0x3, 0) ){ - __m128i onezero = _mm_set_epi64x(1,0); - c = _mm_add_epi64(c, onezero); - } -#endif - return v; -} - -R123_STATIC_INLINE r123m128i& operator+=(r123m128i& lhs, R123_ULONG_LONG n){ - __m128i c = lhs.m; - __m128i incr128 = _mm_set_epi64x(0, n); - c = _mm_add_epi64(c, incr128); - // return c; // NO CARRY! - - int64_t lo64 = _mm_extract_lo64(c); - if((uint64_t)lo64 < n) - c = _mm_add_epi64(c, _mm_set_epi64x(1,0)); - lhs.m = c; - return lhs; -} - -// We need this one because it's present, but never used in r123array1xm128i::incr -R123_STATIC_INLINE bool operator<=(R123_ULONG_LONG, const r123m128i &){ - throw std::runtime_error("operator<=(unsigned long long, r123m128i) is unimplemented.");} - -// The comparisons aren't implemented, but if we leave them out, and -// somebody writes, e.g., M1 < M2, the compiler will do an implicit -// conversion through void*. Sigh... -R123_STATIC_INLINE bool operator<(const r123m128i&, const r123m128i&){ - throw std::runtime_error("operator<(r123m128i, r123m128i) is unimplemented.");} -R123_STATIC_INLINE bool operator<=(const r123m128i&, const r123m128i&){ - throw std::runtime_error("operator<=(r123m128i, r123m128i) is unimplemented.");} -R123_STATIC_INLINE bool operator>(const r123m128i&, const r123m128i&){ - throw std::runtime_error("operator>(r123m128i, r123m128i) is unimplemented.");} -R123_STATIC_INLINE bool operator>=(const r123m128i&, const r123m128i&){ - throw std::runtime_error("operator>=(r123m128i, r123m128i) is unimplemented.");} - -R123_STATIC_INLINE bool operator==(const r123m128i &lhs, const r123m128i &rhs){ - return 0xf==_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(lhs, rhs))); } -R123_STATIC_INLINE bool operator!=(const r123m128i &lhs, const r123m128i &rhs){ - return !(lhs==rhs);} -R123_STATIC_INLINE bool operator==(R123_ULONG_LONG lhs, const r123m128i &rhs){ - r123m128i LHS; LHS.m=_mm_set_epi64x(0, lhs); return LHS == rhs; } -R123_STATIC_INLINE bool operator!=(R123_ULONG_LONG lhs, const r123m128i &rhs){ - return !(lhs==rhs);} -R123_STATIC_INLINE std::ostream& operator<<(std::ostream& os, const r123m128i& m){ - union{ - uint64_t u64[2]; - __m128i m; - }u; - _mm_storeu_si128(&u.m, m.m); - return os << u.u64[0] << " " << u.u64[1]; -} - -R123_STATIC_INLINE std::istream& operator>>(std::istream& is, r123m128i& m){ - uint64_t u64[2]; - is >> u64[0] >> u64[1]; - m.m = _mm_set_epi64x(u64[1], u64[0]); - return is; -} - -template<typename T> inline T assemble_from_u32(uint32_t *p32); // forward declaration - -template <> -inline r123m128i assemble_from_u32<r123m128i>(uint32_t *p32){ - r123m128i ret; - ret.m = _mm_set_epi32(p32[3], p32[2], p32[1], p32[0]); - return ret; -} - -#else - -typedef struct { - __m128i m; -} r123m128i; - -#endif /* __cplusplus */ - -#else /* !R123_USE_SSE */ -R123_STATIC_INLINE int haveAESNI(){ - return 0; -} -#endif /* R123_USE_SSE */ - -#endif /* _Random123_sse_dot_h__ */ diff --git a/ext/random123/include/Random123/features/sunprofeatures.h b/ext/random123/include/Random123/features/sunprofeatures.h deleted file mode 100644 index c9cdc00f..00000000 --- a/ext/random123/include/Random123/features/sunprofeatures.h +++ /dev/null @@ -1,172 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __sunprofeatures_dot_hpp -#define __sunprofeatures_dot_hpp - -#ifndef R123_STATIC_INLINE -#define R123_STATIC_INLINE static inline -#endif - -#ifndef R123_FORCE_INLINE -#define R123_FORCE_INLINE(decl) decl -#endif - -#ifndef R123_CUDA_DEVICE -#define R123_CUDA_DEVICE -#endif - -#ifndef R123_ASSERT -#include <assert.h> -#define R123_ASSERT(x) assert(x) -#endif - -#ifndef R123_BUILTIN_EXPECT -#define R123_BUILTIN_EXPECT(expr,likely) expr -#endif - -// The basic idiom is: -// #ifndef R123_SOMETHING -// #if some condition -// #define R123_SOMETHING 1 -// #else -// #define R123_SOMETHING 0 -// #endif -// #endif -// This idiom allows an external user to override any decision -// in this file with a command-line -DR123_SOMETHING=1 or -DR123_SOMETHINE=0 - -// An alternative idiom is: -// #ifndef R123_SOMETHING -// #define R123_SOMETHING (some boolean expression) -// #endif -// where the boolean expression might contain previously-defined R123_SOMETHING_ELSE -// pp-symbols. - -#ifndef R123_USE_AES_NI -#define R123_USE_AES_NI 0 -#endif - -#ifndef R123_USE_SSE4_2 -#define R123_USE_SSE4_2 0 -#endif - -#ifndef R123_USE_SSE4_1 -#define R123_USE_SSE4_1 0 -#endif - -#ifndef R123_USE_SSE -#define R123_USE_SSE 0 -#endif - -#ifndef R123_USE_AES_OPENSSL -#define R123_USE_AES_OPENSSL 0 -#endif - -#ifndef R123_USE_GNU_UINT128 -#define R123_USE_GNU_UINT128 0 -#endif - -#ifndef R123_USE_ASM_GNU -#define R123_USE_ASM_GNU 0 -#endif - -#ifndef R123_USE_CPUID_MSVC -#define R123_USE_CPUID_MSVC 0 -#endif - -#ifndef R123_USE_X86INTRIN_H -#define R123_USE_X86INTRIN_H 0 -#endif - -#ifndef R123_USE_IA32INTRIN_H -#define R123_USE_IA32INTRIN_H 0 -#endif - -#ifndef R123_USE_XMMINTRIN_H -#define R123_USE_XMMINTRIN_H 0 -#endif - -#ifndef R123_USE_EMMINTRIN_H -#define R123_USE_EMMINTRIN_H 0 -#endif - -#ifndef R123_USE_SMMINTRIN_H -#define R123_USE_SMMINTRIN_H 0 -#endif - -#ifndef R123_USE_WMMINTRIN_H -#define R123_USE_WMMINTRIN_H 0 -#endif - -#ifndef R123_USE_INTRIN_H -#define R123_USE_INTRIN_H 0 -#endif - -#ifndef R123_USE_MULHILO16_ASM -#define R123_USE_MULHILO16_ASM 0 -#endif - -#ifndef R123_USE_MULHILO32_ASM -#define R123_USE_MULHILO32_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_MSVC_INTRIN -#define R123_USE_MULHILO64_MSVC_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_CUDA_INTRIN -#define R123_USE_MULHILO64_CUDA_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_OPENCL_INTRIN -#define R123_USE_MULHILO64_OPENCL_INTRIN 0 -#endif - -#ifndef R123_USE_PHILOX_64BIT -#define R123_USE_PHILOX_64BIT 0 -#endif - -#ifndef __STDC_CONSTANT_MACROS -#define __STDC_CONSTANT_MACROS -#endif -#include <stdint.h> -#ifndef UINT64_C -#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h> -#endif - -// If you add something, it must go in all the other XXfeatures.hpp -// and in ../ut_features.cpp -#endif diff --git a/ext/random123/include/Random123/features/xlcfeatures.h b/ext/random123/include/Random123/features/xlcfeatures.h deleted file mode 100644 index ccb98ee5..00000000 --- a/ext/random123/include/Random123/features/xlcfeatures.h +++ /dev/null @@ -1,210 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Copyright (c) 2013, Los Alamos National Security, LLC -All rights reserved. - -Copyright 2013. Los Alamos National Security, LLC. This software was produced -under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National -Laboratory (LANL), which is operated by Los Alamos National Security, LLC for -the U.S. Department of Energy. The U.S. Government has rights to use, -reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS -ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR -ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified -to produce derivative works, such modified software should be clearly marked, -so as not to confuse it with the version available from LANL. -*/ -#ifndef __xlcfeatures_dot_hpp -#define __xlcfeatures_dot_hpp - -#if !defined(__x86_64__) && !defined(__i386__) && !defined(__powerpc__) -# error "This code has only been tested on x86 and PowerPC platforms." -#include <including_a_nonexistent_file_will_stop_some_compilers_from_continuing_with_a_hopeless_task> -{ /* maybe an unbalanced brace will terminate the compilation */ - /* Feel free to try the Random123 library on other architectures by changing - the conditions that reach this error, but you should consider it a - porting exercise and expect to encounter bugs and deficiencies. - Please let the authors know of any successes (or failures). */ -#endif - -#ifdef __cplusplus -/* builtins are automatically available to xlc. To use them with xlc++, - one must include builtins.h. c.f - http://publib.boulder.ibm.com/infocenter/cellcomp/v101v121/index.jsp?topic=/com.ibm.xlcpp101.cell.doc/compiler_ref/compiler_builtins.html -*/ -#include <builtins.h> -#endif - -#ifndef R123_STATIC_INLINE -#define R123_STATIC_INLINE static inline -#endif - -#ifndef R123_FORCE_INLINE -#define R123_FORCE_INLINE(decl) decl __attribute__((__always_inline__)) -#endif - -#ifndef R123_CUDA_DEVICE -#define R123_CUDA_DEVICE -#endif - -#ifndef R123_ASSERT -#include <assert.h> -#define R123_ASSERT(x) assert(x) -#endif - -#ifndef R123_BUILTIN_EXPECT -#define R123_BUILTIN_EXPECT(expr,likely) __builtin_expect(expr,likely) -#endif - -#ifndef R123_USE_AES_NI -#define R123_USE_AES_NI 0 -#endif - -#ifndef R123_USE_SSE4_2 -#define R123_USE_SSE4_2 0 -#endif - -#ifndef R123_USE_SSE4_1 -#define R123_USE_SSE4_1 0 -#endif - -#ifndef R123_USE_SSE -#define R123_USE_SSE 0 -#endif - -#ifndef R123_USE_AES_OPENSSL -/* There isn't really a good way to tell at compile time whether - openssl is available. Without a pre-compilation configure-like - tool, it's less error-prone to guess that it isn't available. Add - -DR123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to - play with openssl */ -#define R123_USE_AES_OPENSSL 0 -#endif - -#ifndef R123_USE_GNU_UINT128 -#define R123_USE_GNU_UINT128 0 -#endif - -#ifndef R123_USE_ASM_GNU -#define R123_USE_ASM_GNU 1 -#endif - -#ifndef R123_USE_CPUID_MSVC -#define R123_USE_CPUID_MSVC 0 -#endif - -#ifndef R123_USE_X86INTRIN_H -#define R123_USE_X86INTRIN_H 0 -#endif - -#ifndef R123_USE_IA32INTRIN_H -#define R123_USE_IA32INTRIN_H 0 -#endif - -#ifndef R123_USE_XMMINTRIN_H -#define R123_USE_XMMINTRIN_H 0 -#endif - -#ifndef R123_USE_EMMINTRIN_H -#define R123_USE_EMMINTRIN_H 0 -#endif - -#ifndef R123_USE_SMMINTRIN_H -#define R123_USE_SMMINTRIN_H 0 -#endif - -#ifndef R123_USE_WMMINTRIN_H -#define R123_USE_WMMINTRIN_H 0 -#endif - -#ifndef R123_USE_INTRIN_H -#ifdef __ABM__ -#define R123_USE_INTRIN_H 1 -#else -#define R123_USE_INTRIN_H 0 -#endif -#endif - -#ifndef R123_USE_MULHILO32_ASM -#define R123_USE_MULHILO32_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_MULHI_INTRIN -#if (defined(__powerpc64__)) -#define R123_USE_MULHILO64_MULHI_INTRIN 1 -#else -#define R123_USE_MULHILO64_MULHI_INTRIN 0 -#endif -#endif - -#ifndef R123_MULHILO64_MULHI_INTRIN -#define R123_MULHILO64_MULHI_INTRIN __mulhdu -#endif - -#ifndef R123_USE_MULHILO32_MULHI_INTRIN -#define R123_USE_MULHILO32_MULHI_INTRIN 0 -#endif - -#ifndef R123_MULHILO32_MULHI_INTRIN -#define R123_MULHILO32_MULHI_INTRIN __mulhwu -#endif - -#ifndef R123_USE_MULHILO64_ASM -#if defined(__powerpc64__) -#define R123_USE_MULHILO64_ASM (1 /*defined(__powerpc64__)*/ && !(R123_USE_MULHILO64_MULHI_INTRIN)) -#else -#define R123_USE_MULHILO64_ASM (0 /*defined(__powerpc64__)*/ && !(R123_USE_MULHILO64_MULHI_INTRIN)) -#endif -#endif - -#ifndef R123_USE_MULHILO64_MSVC_INTRIN -#define R123_USE_MULHILO64_MSVC_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_CUDA_INTRIN -#define R123_USE_MULHILO64_CUDA_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_OPENCL_INTRIN -#define R123_USE_MULHILO64_OPENCL_INTRIN 0 -#endif - -#ifndef __STDC_CONSTANT_MACROS -#define __STDC_CONSTANT_MACROS -#endif -#include <stdint.h> -#ifndef UINT64_C -#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h> -#endif - -/* If you add something, it must go in all the other XXfeatures.hpp - and in ../ut_features.cpp */ -#endif diff --git a/ext/random123/include/Random123/gsl_microrng.h b/ext/random123/include/Random123/gsl_microrng.h deleted file mode 100644 index 4f094121..00000000 --- a/ext/random123/include/Random123/gsl_microrng.h +++ /dev/null @@ -1,136 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __r123_gslmicrorng_dot_h__ -#define __r123_gslmicrorng_dot_h__ - - -#include <gsl/gsl_rng.h> -#include <string.h> - -/** The macro: GSL_MICRORNG(NAME, CBRNGNAME) is the GSL - analog analog of the C++ r123::MicroURNG template. It declares a gsl_rng - type named gsl_rng_NAME which uses the underlying CBRNGNAME - and can be invoked a limited number of times between calls to NAME_reset. - - When the underlying CBRNG's \c ctr_t is an \ref arrayNxW "r123arrayNxW", - and the gsl_rng_NAME may called up to \c N*2^32 times - between calls to \c NAME_reset. - - \c NAME_reset takes a gsl_rng_NAME type, a counter and a key as arguments. - It restarts the micro-rng with a new base counter and key. - - Note that you must call NAME_reset before the first use - of a gsl_rng. NAME_reset is not called automatically by - gsl_rng_alloc(). - - @code - #include <Random123/threefry.h> - #include <Random123/gsl_microrng.h> // this file - GSL_MICRORNG(microcbrng, threefry4x64, 20) // creates gsl_rng_microcbrng - - int main(int argc, char** argv) { - gsl_rng *r = gsl_rng_alloc(gsl_rng_microcbrng); - threefry4x64_ctr_t c = {{}}; - threefry4x64_key_t k = {{}}; - - for (...) { - c.v[0] = ??; // some application variable - microcbrng_reset(r, c, k); - for (...) { - // gaussian calls r several times. It is safe for - // r to be used upto 2^20 times in this loop - something[i] = gsl_ran_gaussian(r, 1.5); - } - } - } - @endcode - -*/ - -#define GSL_MICRORNG(NAME, CBRNGNAME) \ -const gsl_rng_type *gsl_rng_##NAME; \ - \ -typedef struct{ \ - CBRNGNAME##_ctr_t ctr; \ - CBRNGNAME##_ctr_t r; \ - CBRNGNAME##_key_t key; \ - R123_ULONG_LONG n; \ - int elem; \ -} NAME##_state; \ - \ -static unsigned long int NAME##_get(void *vstate){ \ - NAME##_state *st = (NAME##_state *)vstate; \ - const int N=sizeof(st->ctr.v)/sizeof(st->ctr.v[0]); \ - if( st->elem == 0 ){ \ - CBRNGNAME##_ctr_t c = st->ctr; \ - c.v[N-1] |= st->n<<(R123_W(CBRNGNAME##_ctr_t)-32); \ - st->n++; \ - st->r = CBRNGNAME(c, st->key); \ - st->elem = N; \ - } \ - return 0xffffffff & st->r.v[--st->elem]; \ -} \ - \ -static double \ -NAME##_get_double (void * vstate) \ -{ \ - return NAME##_get (vstate)/4294967296.; \ -} \ - \ -static void NAME##_set(void *vstate, unsigned long int s){ \ - NAME##_state *st = (NAME##_state *)vstate; \ - (void)s; /* ignored */ \ - st->elem = 0; \ - st->n = ~0; /* will abort if _reset is not called */ \ -} \ - \ -static const gsl_rng_type NAME##_type = { \ - #NAME, \ - 0xffffffffUL, \ - 0, \ - sizeof(NAME##_state), \ - &NAME##_set, \ - &NAME##_get, \ - &NAME##_get_double \ -}; \ - \ -R123_STATIC_INLINE void NAME##_reset(const gsl_rng* gr, CBRNGNAME##_ctr_t c, CBRNGNAME##_key_t k) { \ - NAME##_state* state = (NAME##_state *)gr->state; \ - state->ctr = c; \ - state->key = k; \ - state->n = 0; \ - state->elem = 0; \ -} \ - \ -const gsl_rng_type *gsl_rng_##NAME = &NAME##_type - -#endif diff --git a/ext/random123/include/Random123/philox.h b/ext/random123/include/Random123/philox.h deleted file mode 100644 index 7bf4d195..00000000 --- a/ext/random123/include/Random123/philox.h +++ /dev/null @@ -1,493 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef _philox_dot_h_ -#define _philox_dot_h_ - -/** \cond HIDDEN_FROM_DOXYGEN */ - -#include "features/compilerfeatures.h" -#include "array.h" - - -/* -// Macros _Foo_tpl are code generation 'templates' They define -// inline functions with names obtained by mangling Foo and the -// macro arguments. E.g., -// _mulhilo_tpl(32, uint32_t, uint64_t) -// expands to a definition of: -// mulhilo32(uint32_t, uint32_t, uint32_t *, uint32_t *) -// We then 'instantiate the template' to define -// several different functions, e.g., -// mulhilo32 -// mulhilo64 -// These functions will be visible to user code, and may -// also be used later in subsequent templates and definitions. - -// A template for mulhilo using a temporary of twice the word-width. -// Gcc figures out that this can be reduced to a single 'mul' instruction, -// despite the apparent use of double-wide variables, shifts, etc. It's -// obviously not guaranteed that all compilers will be that smart, so -// other implementations might be preferable, e.g., using an intrinsic -// or an asm block. On the other hand, for 32-bit multiplies, -// this *is* perfectly standard C99 - any C99 compiler should -// understand it and produce correct code. For 64-bit multiplies, -// it's only usable if the compiler recognizes that it can do -// arithmetic on a 128-bit type. That happens to be true for gcc on -// x86-64, and powerpc64 but not much else. -*/ -#define _mulhilo_dword_tpl(W, Word, Dword) \ -R123_CUDA_DEVICE R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word* hip){ \ - Dword product = ((Dword)a)*((Dword)b); \ - *hip = product>>W; \ - return (Word)product; \ -} - -/* -// A template for mulhilo using gnu-style asm syntax. -// INSN can be "mulw", "mull" or "mulq". -// FIXME - porting to other architectures, we'll need still-more conditional -// branching here. Note that intrinsics are usually preferable. -*/ -#ifdef __powerpc__ -#define _mulhilo_asm_tpl(W, Word, INSN) \ -R123_STATIC_INLINE Word mulhilo##W(Word ax, Word b, Word *hip){ \ - Word dx = 0; \ - __asm__("\n\t" \ - INSN " %0,%1,%2\n\t" \ - : "=r"(dx) \ - : "r"(b), "r"(ax) \ - ); \ - *hip = dx; \ - return ax*b; \ -} -#else -#define _mulhilo_asm_tpl(W, Word, INSN) \ -R123_STATIC_INLINE Word mulhilo##W(Word ax, Word b, Word *hip){ \ - Word dx; \ - __asm__("\n\t" \ - INSN " %2\n\t" \ - : "=a"(ax), "=d"(dx) \ - : "r"(b), "0"(ax) \ - ); \ - *hip = dx; \ - return ax; \ -} -#endif /* __powerpc__ */ - -/* -// A template for mulhilo using MSVC-style intrinsics -// For example,_umul128 is an msvc intrinsic, c.f. -// http://msdn.microsoft.com/en-us/library/3dayytw9.aspx -*/ -#define _mulhilo_msvc_intrin_tpl(W, Word, INTRIN) \ -R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word* hip){ \ - return INTRIN(a, b, hip); \ -} - -/* N.B. This really should be called _mulhilo_mulhi_intrin. It just - happens that CUDA was the first time we used the idiom. */ -#define _mulhilo_cuda_intrin_tpl(W, Word, INTRIN) \ -R123_CUDA_DEVICE R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, R123_METAL_THREAD_ADDRESS_SPACE Word* hip){ \ - *hip = INTRIN(a, b); \ - return a*b; \ -} - -/* -// A template for mulhilo using only word-size operations and -// C99 operators (no adc, no mulhi). It -// requires four multiplies and a dozen or so shifts, adds -// and tests. It's *SLOW*. It can be used to -// implement philoxNx32 on platforms that completely lack -// 64-bit types, e.g., Metal. -// On 32-bit platforms, it could be used to -// implement philoxNx64, but on such platforms both the philoxNx32 -// and the threefryNx64 cbrngs are going to have much better -// performance. It is enabled below by R123_USE_MULHILO64_C99, -// but that is currently (Feb 2019) only set by -// features/metalfeatures.h headers. It can, of course, be -// set with a compile-time -D option. -*/ -#define _mulhilo_c99_tpl(W, Word) \ -R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, R123_METAL_THREAD_ADDRESS_SPACE Word *hip){ \ - const unsigned WHALF = W/2; \ - const Word LOMASK = ((((Word)1)<<WHALF)-1); \ - Word lo = a*b; /* full low multiply */ \ - Word ahi = a>>WHALF; \ - Word alo = a& LOMASK; \ - Word bhi = b>>WHALF; \ - Word blo = b& LOMASK; \ - \ - Word ahbl = ahi*blo; \ - Word albh = alo*bhi; \ - \ - Word ahbl_albh = ((ahbl&LOMASK) + (albh&LOMASK)); \ - Word hi = ahi*bhi + (ahbl>>WHALF) + (albh>>WHALF); \ - hi += ahbl_albh >> WHALF; /* carry from the sum of lo(ahbl) + lo(albh) ) */ \ - /* carry from the sum with alo*blo */ \ - hi += ((lo >> WHALF) < (ahbl_albh&LOMASK)); \ - *hip = hi; \ - return lo; \ -} - -/* -// A template for mulhilo on a platform that can't do it -// We could put a C version here, but is it better to run *VERY* -// slowly or to just stop and force the user to find another CBRNG? -*/ -#define _mulhilo_fail_tpl(W, Word) \ -R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word *hip){ \ - R123_STATIC_ASSERT(0, "mulhilo" #W " is not implemented on this machine\n"); \ -} - -/* -// N.B. There's an MSVC intrinsic called _emul, -// which *might* compile into better code than -// _mulhilo_dword_tpl -*/ -#if R123_USE_MULHILO32_ASM -#ifdef __powerpc__ -_mulhilo_asm_tpl(32, uint32_t, "mulhwu") -#else -_mulhilo_asm_tpl(32, uint32_t, "mull") -#endif /* __powerpc__ */ -#else -#if R123_USE_64BIT -_mulhilo_dword_tpl(32, uint32_t, uint64_t) -#elif R123_USE_MULHILO32_MULHI_INTRIN -_mulhilo_cuda_intrin_tpl(32, uint32_t, R123_MULHILO32_MULHI_INTRIN) -#else -_mulhilo_c99_tpl(32, uint32_t) -#endif -#endif - -#if R123_USE_PHILOX_64BIT -#if R123_USE_MULHILO64_ASM -#ifdef __powerpc64__ -_mulhilo_asm_tpl(64, uint64_t, "mulhdu") -#else -_mulhilo_asm_tpl(64, uint64_t, "mulq") -#endif /* __powerpc64__ */ -#elif R123_USE_MULHILO64_MSVC_INTRIN -_mulhilo_msvc_intrin_tpl(64, uint64_t, _umul128) -#elif R123_USE_MULHILO64_CUDA_INTRIN -_mulhilo_cuda_intrin_tpl(64, uint64_t, __umul64hi) -#elif R123_USE_MULHILO64_OPENCL_INTRIN -_mulhilo_cuda_intrin_tpl(64, uint64_t, mul_hi) -#elif R123_USE_MULHILO64_MULHI_INTRIN -_mulhilo_cuda_intrin_tpl(64, uint64_t, R123_MULHILO64_MULHI_INTRIN) -#elif R123_USE_GNU_UINT128 -_mulhilo_dword_tpl(64, uint64_t, __uint128_t) -#elif R123_USE_MULHILO64_C99 -_mulhilo_c99_tpl(64, uint64_t) -#else -_mulhilo_fail_tpl(64, uint64_t) -#endif -#endif - -/* -// The multipliers and Weyl constants are "hard coded". -// To change them, you can #define them with different -// values before #include-ing this file. -// This isn't terribly elegant, but it works for C as -// well as C++. A nice C++-only solution would be to -// use template parameters in the style of <random> -*/ -#ifndef PHILOX_M2x64_0 -#define PHILOX_M2x64_0 R123_64BIT(0xD2B74407B1CE6E93) -#endif - -#ifndef PHILOX_M4x64_0 -#define PHILOX_M4x64_0 R123_64BIT(0xD2E7470EE14C6C93) -#endif - -#ifndef PHILOX_M4x64_1 -#define PHILOX_M4x64_1 R123_64BIT(0xCA5A826395121157) -#endif - -#ifndef PHILOX_M2x32_0 -#define PHILOX_M2x32_0 ((uint32_t)0xd256d193) -#endif - -#ifndef PHILOX_M4x32_0 -#define PHILOX_M4x32_0 ((uint32_t)0xD2511F53) -#endif -#ifndef PHILOX_M4x32_1 -#define PHILOX_M4x32_1 ((uint32_t)0xCD9E8D57) -#endif - -#ifndef PHILOX_W64_0 -#define PHILOX_W64_0 R123_64BIT(0x9E3779B97F4A7C15) /* golden ratio */ -#endif -#ifndef PHILOX_W64_1 -#define PHILOX_W64_1 R123_64BIT(0xBB67AE8584CAA73B) /* sqrt(3)-1 */ -#endif - -#ifndef PHILOX_W32_0 -#define PHILOX_W32_0 ((uint32_t)0x9E3779B9) -#endif -#ifndef PHILOX_W32_1 -#define PHILOX_W32_1 ((uint32_t)0xBB67AE85) -#endif - -/** \endcond */ -#ifndef PHILOX2x32_DEFAULT_ROUNDS -#define PHILOX2x32_DEFAULT_ROUNDS 10 -#endif - -#ifndef PHILOX2x64_DEFAULT_ROUNDS -#define PHILOX2x64_DEFAULT_ROUNDS 10 -#endif - -#ifndef PHILOX4x32_DEFAULT_ROUNDS -#define PHILOX4x32_DEFAULT_ROUNDS 10 -#endif - -#ifndef PHILOX4x64_DEFAULT_ROUNDS -#define PHILOX4x64_DEFAULT_ROUNDS 10 -#endif -/** \cond HIDDEN_FROM_DOXYGEN */ - -/* The ignored fourth argument allows us to instantiate the - same macro regardless of N. */ -#define _philox2xWround_tpl(W, T) \ -R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(struct r123array2x##W _philox2x##W##round(struct r123array2x##W ctr, struct r123array1x##W key)); \ -R123_CUDA_DEVICE R123_STATIC_INLINE struct r123array2x##W _philox2x##W##round(struct r123array2x##W ctr, struct r123array1x##W key){ \ - T hi; \ - T lo = mulhilo##W(PHILOX_M2x##W##_0, ctr.v[0], &hi); \ - struct r123array2x##W out = {{hi^key.v[0]^ctr.v[1], lo}}; \ - return out; \ -} -#define _philox2xWbumpkey_tpl(W) \ -R123_CUDA_DEVICE R123_STATIC_INLINE struct r123array1x##W _philox2x##W##bumpkey( struct r123array1x##W key) { \ - key.v[0] += PHILOX_W##W##_0; \ - return key; \ -} - -#define _philox4xWround_tpl(W, T) \ -R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(struct r123array4x##W _philox4x##W##round(struct r123array4x##W ctr, struct r123array2x##W key)); \ -R123_CUDA_DEVICE R123_STATIC_INLINE struct r123array4x##W _philox4x##W##round(struct r123array4x##W ctr, struct r123array2x##W key){ \ - T hi0; \ - T hi1; \ - T lo0 = mulhilo##W(PHILOX_M4x##W##_0, ctr.v[0], &hi0); \ - T lo1 = mulhilo##W(PHILOX_M4x##W##_1, ctr.v[2], &hi1); \ - struct r123array4x##W out = {{hi1^ctr.v[1]^key.v[0], lo1, \ - hi0^ctr.v[3]^key.v[1], lo0}}; \ - return out; \ -} - -#define _philox4xWbumpkey_tpl(W) \ -R123_CUDA_DEVICE R123_STATIC_INLINE struct r123array2x##W _philox4x##W##bumpkey( struct r123array2x##W key) { \ - key.v[0] += PHILOX_W##W##_0; \ - key.v[1] += PHILOX_W##W##_1; \ - return key; \ -} - -/** \endcond */ -#define _philoxNxW_tpl(N, Nhalf, W, T) \ -/** @ingroup PhiloxNxW */ \ -enum r123_enum_philox##N##x##W { philox##N##x##W##_rounds = PHILOX##N##x##W##_DEFAULT_ROUNDS }; \ -typedef struct r123array##N##x##W philox##N##x##W##_ctr_t; \ -typedef struct r123array##Nhalf##x##W philox##N##x##W##_key_t; \ -typedef struct r123array##Nhalf##x##W philox##N##x##W##_ukey_t; \ -R123_CUDA_DEVICE R123_STATIC_INLINE philox##N##x##W##_key_t philox##N##x##W##keyinit(philox##N##x##W##_ukey_t uk) { return uk; } \ -R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(philox##N##x##W##_ctr_t philox##N##x##W##_R(unsigned int R, philox##N##x##W##_ctr_t ctr, philox##N##x##W##_key_t key)); \ -R123_CUDA_DEVICE R123_STATIC_INLINE philox##N##x##W##_ctr_t philox##N##x##W##_R(unsigned int R, philox##N##x##W##_ctr_t ctr, philox##N##x##W##_key_t key) { \ - R123_ASSERT(R<=16); \ - if(R>0){ ctr = _philox##N##x##W##round(ctr, key); } \ - if(R>1){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ - if(R>2){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ - if(R>3){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ - if(R>4){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ - if(R>5){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ - if(R>6){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ - if(R>7){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ - if(R>8){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ - if(R>9){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ - if(R>10){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ - if(R>11){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ - if(R>12){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ - if(R>13){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ - if(R>14){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ - if(R>15){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \ - return ctr; \ -} - -_philox2xWbumpkey_tpl(32) -_philox4xWbumpkey_tpl(32) -_philox2xWround_tpl(32, uint32_t) /* philox2x32round */ -_philox4xWround_tpl(32, uint32_t) /* philo4x32round */ - -_philoxNxW_tpl(2, 1, 32, uint32_t) /* philox2x32bijection */ -_philoxNxW_tpl(4, 2, 32, uint32_t) /* philox4x32bijection */ -#if R123_USE_PHILOX_64BIT -/** \cond HIDDEN_FROM_DOXYGEN */ -_philox2xWbumpkey_tpl(64) -_philox4xWbumpkey_tpl(64) -_philox2xWround_tpl(64, uint64_t) /* philo2x64round */ -_philox4xWround_tpl(64, uint64_t) /* philo4x64round */ -/** \endcond */ -_philoxNxW_tpl(2, 1, 64, uint64_t) /* philox2x64bijection */ -_philoxNxW_tpl(4, 2, 64, uint64_t) /* philox4x64bijection */ -#endif /* R123_USE_PHILOX_64BIT */ - -#define philox2x32(c,k) philox2x32_R(philox2x32_rounds, c, k) -#define philox4x32(c,k) philox4x32_R(philox4x32_rounds, c, k) -#if R123_USE_PHILOX_64BIT -#define philox2x64(c,k) philox2x64_R(philox2x64_rounds, c, k) -#define philox4x64(c,k) philox4x64_R(philox4x64_rounds, c, k) -#endif /* R123_USE_PHILOX_64BIT */ - -#if defined(__cplusplus) - -#define _PhiloxNxW_base_tpl(CType, KType, N, W) \ -namespace r123{ \ -template<unsigned int ROUNDS> \ -struct Philox##N##x##W##_R{ \ - typedef CType ctr_type; \ - typedef KType key_type; \ - typedef KType ukey_type; \ - static const R123_METAL_CONSTANT_ADDRESS_SPACE unsigned int rounds=ROUNDS; \ - inline R123_CUDA_DEVICE R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){ \ - R123_STATIC_ASSERT(ROUNDS<=16, "philox is only unrolled up to 16 rounds\n"); \ - return philox##N##x##W##_R(ROUNDS, ctr, key); \ - } \ -}; \ -typedef Philox##N##x##W##_R<philox##N##x##W##_rounds> Philox##N##x##W; \ - } // namespace r123 - -_PhiloxNxW_base_tpl(r123array2x32, r123array1x32, 2, 32) // Philox2x32_R<R> -_PhiloxNxW_base_tpl(r123array4x32, r123array2x32, 4, 32) // Philox4x32_R<R> -#if R123_USE_PHILOX_64BIT -_PhiloxNxW_base_tpl(r123array2x64, r123array1x64, 2, 64) // Philox2x64_R<R> -_PhiloxNxW_base_tpl(r123array4x64, r123array2x64, 4, 64) // Philox4x64_R<R> -#endif - -/* The _tpl macros don't quite work to do string-pasting inside comments. - so we just write out the boilerplate documentation four times... */ - -/** -@defgroup PhiloxNxW Philox Classes and Typedefs - -The PhiloxNxW classes export the member functions, typedefs and -operator overloads required by a @ref CBRNG "CBRNG" class. - -As described in -<a href="http://dl.acm.org/citation.cfm?doid=2063405"><i>Parallel Random Numbers: As Easy as 1, 2, 3</i> </a>. -The Philox family of counter-based RNGs use integer multiplication, xor and permutation of W-bit words -to scramble its N-word input key. Philox is a mnemonic for Product HI LO Xor). - - -@class r123::Philox2x32_R -@ingroup PhiloxNxW - -exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. - -The template argument, ROUNDS, is the number of times the Philox round -function will be applied. - -As of November 2011, the authors know of no statistical flaws with -ROUNDS=6 or more for Philox2x32. - -@typedef r123::Philox2x32 -@ingroup PhiloxNxW - Philox2x32 is equivalent to Philox2x32_R<10>. With 10 rounds, - Philox2x32 has a considerable safety margin over the minimum number - of rounds with no known statistical flaws, but still has excellent - performance. - - - -@class r123::Philox2x64_R -@ingroup PhiloxNxW - -exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. - -The template argument, ROUNDS, is the number of times the Philox round -function will be applied. - -As of September 2011, the authors know of no statistical flaws with -ROUNDS=6 or more for Philox2x64. - -@typedef r123::Philox2x64 -@ingroup PhiloxNxW - Philox2x64 is equivalent to Philox2x64_R<10>. With 10 rounds, - Philox2x64 has a considerable safety margin over the minimum number - of rounds with no known statistical flaws, but still has excellent - performance. - - - -@class r123::Philox4x32_R -@ingroup PhiloxNxW - -exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. - -The template argument, ROUNDS, is the number of times the Philox round -function will be applied. - -In November 2011, the authors recorded some suspicious p-values (approximately 1.e-7) from -some very long (longer than the default BigCrush length) SimpPoker tests. Despite -the fact that even longer tests reverted to "passing" p-values, a cloud remains over -Philox4x32 with 7 rounds. The authors know of no statistical flaws with -ROUNDS=8 or more for Philox4x32. - -@typedef r123::Philox4x32 -@ingroup PhiloxNxW - Philox4x32 is equivalent to Philox4x32_R<10>. With 10 rounds, - Philox4x32 has a considerable safety margin over the minimum number - of rounds with no known statistical flaws, but still has excellent - performance. - - - -@class r123::Philox4x64_R -@ingroup PhiloxNxW - -exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. - -The template argument, ROUNDS, is the number of times the Philox round -function will be applied. - -As of September 2011, the authors know of no statistical flaws with -ROUNDS=7 or more for Philox4x64. - -@typedef r123::Philox4x64 -@ingroup PhiloxNxW - Philox4x64 is equivalent to Philox4x64_R<10>. With 10 rounds, - Philox4x64 has a considerable safety margin over the minimum number - of rounds with no known statistical flaws, but still has excellent - performance. -*/ - -#endif /* __cplusplus */ - -#endif /* _philox_dot_h_ */ diff --git a/ext/random123/include/Random123/threefry.h b/ext/random123/include/Random123/threefry.h deleted file mode 100644 index 390ceffe..00000000 --- a/ext/random123/include/Random123/threefry.h +++ /dev/null @@ -1,870 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef _threefry_dot_h_ -#define _threefry_dot_h_ -#include "features/compilerfeatures.h" -#include "array.h" - -/** \cond HIDDEN_FROM_DOXYGEN */ -/* Significant parts of this file were copied from - from: - Skein_FinalRnd/ReferenceImplementation/skein.h - Skein_FinalRnd/ReferenceImplementation/skein_block.c - - in http://csrc.nist.gov/groups/ST/hash/sha-3/Round3/documents/Skein_FinalRnd.zip - - This file has been modified so that it may no longer perform its originally - intended function. If you're looking for a Skein or Threefish source code, - please consult the original file. - - The original file had the following header: -************************************************************************** -** -** Interface declarations and internal definitions for Skein hashing. -** -** Source code author: Doug Whiting, 2008. -** -** This algorithm and source code is released to the public domain. -** -*************************************************************************** - -*/ - -/* See comment at the top of philox.h for the macro pre-process - strategy. */ - -/* Rotation constants: */ -enum r123_enum_threefry64x4 { - /* These are the R_256 constants from the Threefish reference sources - with names changed to R_64x4... */ - R_64x4_0_0=14, R_64x4_0_1=16, - R_64x4_1_0=52, R_64x4_1_1=57, - R_64x4_2_0=23, R_64x4_2_1=40, - R_64x4_3_0= 5, R_64x4_3_1=37, - R_64x4_4_0=25, R_64x4_4_1=33, - R_64x4_5_0=46, R_64x4_5_1=12, - R_64x4_6_0=58, R_64x4_6_1=22, - R_64x4_7_0=32, R_64x4_7_1=32 -}; - -enum r123_enum_threefry64x2 { - /* - // Output from skein_rot_search: (srs64_B64-X1000) - // Random seed = 1. BlockSize = 128 bits. sampleCnt = 1024. rounds = 8, minHW_or=57 - // Start: Tue Mar 1 10:07:48 2011 - // rMin = 0.136. #0325[*15] [CRC=455A682F. hw_OR=64. cnt=16384. blkSize= 128].format - */ - R_64x2_0_0=16, - R_64x2_1_0=42, - R_64x2_2_0=12, - R_64x2_3_0=31, - R_64x2_4_0=16, - R_64x2_5_0=32, - R_64x2_6_0=24, - R_64x2_7_0=21 - /* 4 rounds: minHW = 4 [ 4 4 4 4 ] - // 5 rounds: minHW = 8 [ 8 8 8 8 ] - // 6 rounds: minHW = 16 [ 16 16 16 16 ] - // 7 rounds: minHW = 32 [ 32 32 32 32 ] - // 8 rounds: minHW = 64 [ 64 64 64 64 ] - // 9 rounds: minHW = 64 [ 64 64 64 64 ] - //10 rounds: minHW = 64 [ 64 64 64 64 ] - //11 rounds: minHW = 64 [ 64 64 64 64 ] */ -}; - -enum r123_enum_threefry32x4 { - /* Output from skein_rot_search: (srs-B128-X5000.out) - // Random seed = 1. BlockSize = 64 bits. sampleCnt = 1024. rounds = 8, minHW_or=28 - // Start: Mon Aug 24 22:41:36 2009 - // ... - // rMin = 0.472. #0A4B[*33] [CRC=DD1ECE0F. hw_OR=31. cnt=16384. blkSize= 128].format */ - R_32x4_0_0=10, R_32x4_0_1=26, - R_32x4_1_0=11, R_32x4_1_1=21, - R_32x4_2_0=13, R_32x4_2_1=27, - R_32x4_3_0=23, R_32x4_3_1= 5, - R_32x4_4_0= 6, R_32x4_4_1=20, - R_32x4_5_0=17, R_32x4_5_1=11, - R_32x4_6_0=25, R_32x4_6_1=10, - R_32x4_7_0=18, R_32x4_7_1=20 - - /* 4 rounds: minHW = 3 [ 3 3 3 3 ] - // 5 rounds: minHW = 7 [ 7 7 7 7 ] - // 6 rounds: minHW = 12 [ 13 12 13 12 ] - // 7 rounds: minHW = 22 [ 22 23 22 23 ] - // 8 rounds: minHW = 31 [ 31 31 31 31 ] - // 9 rounds: minHW = 32 [ 32 32 32 32 ] - //10 rounds: minHW = 32 [ 32 32 32 32 ] - //11 rounds: minHW = 32 [ 32 32 32 32 ] */ - -}; - -enum r123_enum_threefry32x2 { - /* Output from skein_rot_search (srs32x2-X5000.out) - // Random seed = 1. BlockSize = 64 bits. sampleCnt = 1024. rounds = 8, minHW_or=28 - // Start: Tue Jul 12 11:11:33 2011 - // rMin = 0.334. #0206[*07] [CRC=1D9765C0. hw_OR=32. cnt=16384. blkSize= 64].format */ - R_32x2_0_0=13, - R_32x2_1_0=15, - R_32x2_2_0=26, - R_32x2_3_0= 6, - R_32x2_4_0=17, - R_32x2_5_0=29, - R_32x2_6_0=16, - R_32x2_7_0=24 - - /* 4 rounds: minHW = 4 [ 4 4 4 4 ] - // 5 rounds: minHW = 6 [ 6 8 6 8 ] - // 6 rounds: minHW = 9 [ 9 12 9 12 ] - // 7 rounds: minHW = 16 [ 16 24 16 24 ] - // 8 rounds: minHW = 32 [ 32 32 32 32 ] - // 9 rounds: minHW = 32 [ 32 32 32 32 ] - //10 rounds: minHW = 32 [ 32 32 32 32 ] - //11 rounds: minHW = 32 [ 32 32 32 32 ] */ - }; - -enum r123_enum_threefry_wcnt { - WCNT2=2, - WCNT4=4 -}; - -#if R123_USE_64BIT -R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(uint64_t RotL_64(uint64_t x, unsigned int N)); -R123_CUDA_DEVICE R123_STATIC_INLINE uint64_t RotL_64(uint64_t x, unsigned int N) -{ - return (x << (N & 63)) | (x >> ((64-N) & 63)); -} -#endif - -R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(uint32_t RotL_32(uint32_t x, unsigned int N)); -R123_CUDA_DEVICE R123_STATIC_INLINE uint32_t RotL_32(uint32_t x, unsigned int N) -{ - return (x << (N & 31)) | (x >> ((32-N) & 31)); -} - -#define SKEIN_MK_64(hi32,lo32) ((lo32) + (((uint64_t) (hi32)) << 32)) -#define SKEIN_KS_PARITY64 SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22) -#define SKEIN_KS_PARITY32 0x1BD11BDA - -/** \endcond */ - -#ifndef THREEFRY2x32_DEFAULT_ROUNDS -#define THREEFRY2x32_DEFAULT_ROUNDS 20 -#endif - -#ifndef THREEFRY2x64_DEFAULT_ROUNDS -#define THREEFRY2x64_DEFAULT_ROUNDS 20 -#endif - -#ifndef THREEFRY4x32_DEFAULT_ROUNDS -#define THREEFRY4x32_DEFAULT_ROUNDS 20 -#endif - -#ifndef THREEFRY4x64_DEFAULT_ROUNDS -#define THREEFRY4x64_DEFAULT_ROUNDS 20 -#endif - -#define _threefry2x_tpl(W) \ -typedef struct r123array2x##W threefry2x##W##_ctr_t; \ -typedef struct r123array2x##W threefry2x##W##_key_t; \ -typedef struct r123array2x##W threefry2x##W##_ukey_t; \ -R123_CUDA_DEVICE R123_STATIC_INLINE threefry2x##W##_key_t threefry2x##W##keyinit(threefry2x##W##_ukey_t uk) { return uk; } \ -R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry2x##W##_ctr_t threefry2x##W##_R(unsigned int Nrounds, threefry2x##W##_ctr_t in, threefry2x##W##_key_t k)); \ -R123_CUDA_DEVICE R123_STATIC_INLINE \ -threefry2x##W##_ctr_t threefry2x##W##_R(unsigned int Nrounds, threefry2x##W##_ctr_t in, threefry2x##W##_key_t k){ \ - threefry2x##W##_ctr_t X; \ - uint##W##_t ks[2+1]; \ - int i; /* avoid size_t to avoid need for stddef.h */ \ - R123_ASSERT(Nrounds<=32); \ - ks[2] = SKEIN_KS_PARITY##W; \ - for (i=0;i < 2; i++) \ - { \ - ks[i] = k.v[i]; \ - X.v[i] = in.v[i]; \ - ks[2] ^= k.v[i]; \ - } \ - \ - /* Insert initial key before round 0 */ \ - X.v[0] += ks[0]; X.v[1] += ks[1]; \ - \ - if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>3){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[1]; X.v[1] += ks[2]; \ - X.v[1] += 1; /* X.v[2-1] += r */ \ - } \ - if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>7){ \ - /* InjectKey(r=2) */ \ - X.v[0] += ks[2]; X.v[1] += ks[0]; \ - X.v[1] += 2; \ - } \ - if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>11){ \ - /* InjectKey(r=3) */ \ - X.v[0] += ks[0]; X.v[1] += ks[1]; \ - X.v[1] += 3; \ - } \ - if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>15){ \ - /* InjectKey(r=4) */ \ - X.v[0] += ks[1]; X.v[1] += ks[2]; \ - X.v[1] += 4; \ - } \ - if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>19){ \ - /* InjectKey(r=5) */ \ - X.v[0] += ks[2]; X.v[1] += ks[0]; \ - X.v[1] += 5; \ - } \ - if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>23){ \ - /* InjectKey(r=6) */ \ - X.v[0] += ks[0]; X.v[1] += ks[1]; \ - X.v[1] += 6; \ - } \ - if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>27){ \ - /* InjectKey(r=7) */ \ - X.v[0] += ks[1]; X.v[1] += ks[2]; \ - X.v[1] += 7; \ - } \ - if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>31){ \ - /* InjectKey(r=8) */ \ - X.v[0] += ks[2]; X.v[1] += ks[0]; \ - X.v[1] += 8; \ - } \ - return X; \ -} \ - /** @ingroup ThreefryNxW */ \ -enum r123_enum_threefry2x##W { threefry2x##W##_rounds = THREEFRY2x##W##_DEFAULT_ROUNDS }; \ -R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry2x##W##_ctr_t threefry2x##W(threefry2x##W##_ctr_t in, threefry2x##W##_key_t k)); \ -R123_CUDA_DEVICE R123_STATIC_INLINE \ -threefry2x##W##_ctr_t threefry2x##W(threefry2x##W##_ctr_t in, threefry2x##W##_key_t k){ \ - return threefry2x##W##_R(threefry2x##W##_rounds, in, k); \ -} - - -#define _threefry4x_tpl(W) \ -typedef struct r123array4x##W threefry4x##W##_ctr_t; \ -typedef struct r123array4x##W threefry4x##W##_key_t; \ -typedef struct r123array4x##W threefry4x##W##_ukey_t; \ -R123_CUDA_DEVICE R123_STATIC_INLINE threefry4x##W##_key_t threefry4x##W##keyinit(threefry4x##W##_ukey_t uk) { return uk; } \ -R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry4x##W##_ctr_t threefry4x##W##_R(unsigned int Nrounds, threefry4x##W##_ctr_t in, threefry4x##W##_key_t k)); \ -R123_CUDA_DEVICE R123_STATIC_INLINE \ -threefry4x##W##_ctr_t threefry4x##W##_R(unsigned int Nrounds, threefry4x##W##_ctr_t in, threefry4x##W##_key_t k){ \ - threefry4x##W##_ctr_t X; \ - uint##W##_t ks[4+1]; \ - int i; /* avoid size_t to avoid need for stddef.h */ \ - R123_ASSERT(Nrounds<=72); \ - ks[4] = SKEIN_KS_PARITY##W; \ - for (i=0;i < 4; i++) \ - { \ - ks[i] = k.v[i]; \ - X.v[i] = in.v[i]; \ - ks[4] ^= k.v[i]; \ - } \ - \ - /* Insert initial key before round 0 */ \ - X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \ - \ - if(Nrounds>0){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>1){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>2){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>3){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>3){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \ - X.v[4-1] += 1; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>4){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>5){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>6){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>7){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>7){ \ - /* InjectKey(r=2) */ \ - X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \ - X.v[4-1] += 2; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>8){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>9){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>10){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>11){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>11){ \ - /* InjectKey(r=3) */ \ - X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \ - X.v[4-1] += 3; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>12){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>13){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>14){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>15){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>15){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \ - X.v[4-1] += 4; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>16){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>17){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>18){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>19){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>19){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \ - X.v[4-1] += 5; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>20){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>21){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>22){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>23){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>23){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \ - X.v[4-1] += 6; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>24){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>25){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>26){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>27){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>27){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \ - X.v[4-1] += 7; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>28){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>29){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>30){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>31){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>31){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \ - X.v[4-1] += 8; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>32){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>33){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>34){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>35){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>35){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \ - X.v[4-1] += 9; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>36){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>37){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>38){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>39){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>39){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \ - X.v[4-1] += 10; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>40){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>41){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>42){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>43){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>43){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \ - X.v[4-1] += 11; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>44){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>45){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>46){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>47){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>47){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \ - X.v[4-1] += 12; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>48){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>49){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>50){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>51){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>51){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \ - X.v[4-1] += 13; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>52){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>53){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>54){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>55){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>55){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \ - X.v[4-1] += 14; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>56){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>57){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>58){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>59){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>59){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \ - X.v[4-1] += 15; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>60){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>61){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>62){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>63){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>63){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \ - X.v[4-1] += 16; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>64){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>65){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>66){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>67){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>67){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \ - X.v[4-1] += 17; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>68){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>69){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>70){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>71){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>71){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \ - X.v[4-1] += 18; /* X.v[WCNT4-1] += r */ \ - } \ - \ - return X; \ -} \ - \ - /** @ingroup ThreefryNxW */ \ -enum r123_enum_threefry4x##W { threefry4x##W##_rounds = THREEFRY4x##W##_DEFAULT_ROUNDS }; \ -R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry4x##W##_ctr_t threefry4x##W(threefry4x##W##_ctr_t in, threefry4x##W##_key_t k)); \ -R123_CUDA_DEVICE R123_STATIC_INLINE \ -threefry4x##W##_ctr_t threefry4x##W(threefry4x##W##_ctr_t in, threefry4x##W##_key_t k){ \ - return threefry4x##W##_R(threefry4x##W##_rounds, in, k); \ -} - -#if R123_USE_64BIT -_threefry2x_tpl(64) -_threefry4x_tpl(64) -#endif -_threefry2x_tpl(32) -_threefry4x_tpl(32) - -/* gcc4.5 and 4.6 seem to optimize a macro-ized threefryNxW better - than a static inline function. Why? */ -#define threefry2x32(c,k) threefry2x32_R(threefry2x32_rounds, c, k) -#define threefry4x32(c,k) threefry4x32_R(threefry4x32_rounds, c, k) -#define threefry2x64(c,k) threefry2x64_R(threefry2x64_rounds, c, k) -#define threefry4x64(c,k) threefry4x64_R(threefry4x64_rounds, c, k) - -#if defined(__cplusplus) -#define _threefryNxWclass_tpl(NxW) \ -namespace r123{ \ -template<unsigned int ROUNDS> \ - struct Threefry##NxW##_R{ \ - typedef threefry##NxW##_ctr_t ctr_type; \ - typedef threefry##NxW##_key_t key_type; \ - typedef threefry##NxW##_key_t ukey_type; \ - static const R123_METAL_CONSTANT_ADDRESS_SPACE unsigned int rounds=ROUNDS; \ - inline R123_CUDA_DEVICE R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key)){ \ - R123_STATIC_ASSERT(ROUNDS<=72, "threefry is only unrolled up to 72 rounds\n"); \ - return threefry##NxW##_R(ROUNDS, ctr, key); \ - } \ -}; \ - typedef Threefry##NxW##_R<threefry##NxW##_rounds> Threefry##NxW; \ -} // namespace r123 - -_threefryNxWclass_tpl(2x32) -_threefryNxWclass_tpl(4x32) -#if R123_USE_64BIT -_threefryNxWclass_tpl(2x64) -_threefryNxWclass_tpl(4x64) -#endif - -/* The _tpl macros don't quite work to do string-pasting inside comments. - so we just write out the boilerplate documentation four times... */ - -/** -@defgroup ThreefryNxW Threefry Classes and Typedefs - -The ThreefryNxW classes export the member functions, typedefs and -operator overloads required by a @ref CBRNG "CBRNG" class. - -As described in -<a href="http://dl.acm.org/citation.cfm?doid=2063405"><i>Parallel Random Numbers: As Easy as 1, 2, 3</i> </a>, -the Threefry family is closely related to the Threefish block cipher from -<a href="http://www.skein-hash.info/"> Skein Hash Function</a>. -Threefry is \b not suitable for cryptographic use. - -Threefry uses integer addition, bitwise rotation, xor and permutation of words to randomize its output. - -@class r123::Threefry2x32_R -@ingroup ThreefryNxW - -exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. - -The template argument, ROUNDS, is the number of times the Threefry round -function will be applied. - -As of September 2011, the authors know of no statistical flaws with -ROUNDS=13 or more for Threefry2x32. - -@typedef r123::Threefry2x32 -@ingroup ThreefryNxW - Threefry2x32 is equivalent to Threefry2x32_R<20>. With 20 rounds, - Threefry2x32 has a considerable safety margin over the minimum number - of rounds with no known statistical flaws, but still has excellent - performance. - -@class r123::Threefry2x64_R -@ingroup ThreefryNxW - -exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. - -The template argument, ROUNDS, is the number of times the Threefry round -function will be applied. - -In November 2011, the authors discovered that 13 rounds of -Threefry2x64 sequenced by strided, interleaved key and counter -increments failed a very long (longer than the default BigCrush -length) WeightDistrub test. At the same time, it was confirmed that -14 rounds passes much longer tests (up to 5x10^12 samples) of a -similar nature. The authors know of no statistical flaws with -ROUNDS=14 or more for Threefry2x64. - -@typedef r123::Threefry2x64 -@ingroup ThreefryNxW - Threefry2x64 is equivalent to Threefry2x64_R<20>. With 20 rounds, - Threefry2x64 has a considerable safety margin over the minimum number - of rounds with no known statistical flaws, but still has excellent - performance. - - - -@class r123::Threefry4x32_R -@ingroup ThreefryNxW - -exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. - -The template argument, ROUNDS, is the number of times the Threefry round -function will be applied. - -As of September 2011, the authors know of no statistical flaws with -ROUNDS=12 or more for Threefry4x32. - -@typedef r123::Threefry4x32 -@ingroup ThreefryNxW - Threefry4x32 is equivalent to Threefry4x32_R<20>. With 20 rounds, - Threefry4x32 has a considerable safety margin over the minimum number - of rounds with no known statistical flaws, but still has excellent - performance. - - - -@class r123::Threefry4x64_R -@ingroup ThreefryNxW - -exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. - -The template argument, ROUNDS, is the number of times the Threefry round -function will be applied. - -As of September 2011, the authors know of no statistical flaws with -ROUNDS=12 or more for Threefry4x64. - -@typedef r123::Threefry4x64 -@ingroup ThreefryNxW - Threefry4x64 is equivalent to Threefry4x64_R<20>. With 20 rounds, - Threefry4x64 has a considerable safety margin over the minimum number - of rounds with no known statistical flaws, but still has excellent - performance. -*/ - -#endif - -#endif diff --git a/ext/random123/include/Random123/u01fixedpt.h b/ext/random123/include/Random123/u01fixedpt.h deleted file mode 100644 index 2058f8b5..00000000 --- a/ext/random123/include/Random123/u01fixedpt.h +++ /dev/null @@ -1,200 +0,0 @@ -/* -Copyright 2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef _random123_ufixed01_dot_h_ -#define _random123_ufixed01_dot_h_ - -#include "features/compilerfeatures.h" - -/** @defgroup u01fixedpt The u01fixedpt conversion functions - - These functions convert unsigned W-bit integers to uniformly - spaced real values (float or double) between 0.0 and 1.0 with - mantissas of M bits. - - PLEASE THINK CAREFULLY BEFORE USING THESE FUNCTIONS. THEY MAY - NOT BE WHAT YOU WANT. YOU MAY BE MUCH BETTER SERVED BY THE - FUNCTIONS IN ./uniform.hpp. - - These functions produce a finite number *uniformly spaced* values - in the range from 0.0 to 1.0 with uniform probability. The price - of uniform spacing is that they may not utilize the entire space - of possible outputs. E.g., u01fixedpt_closed_open_32_24 will never - produce a non-zero value less than 2^-24, even though such values - are representable in single-precision floating point. - - There are 12 functions, corresponding to the following choices: - - - W = 32 or 64 - - M = 24 (float) or 53 (double) - - open0 or closed0 : whether the output is open or closed at 0.0 - - open1 or closed1 : whether the output is open or closed at 1.0 - - The W=64 M=24 cases are not implemented. To obtain an M=24 float - from a uint64_t, use a cast (possibly with right-shift and bitwise - and) to convert some of the bits of the uint64_t to a uint32_t and - then use u01fixedpt_x_y_32_float. Note that the 64-bit random integers - produced by the Random123 library are random in "all the bits", so - with a little extra effort you can obtain two floats this way -- - one from the high bits and one from the low bits of the 64-bit - value. - - If the output is open at one end, then the extreme - value (0.0 or 1.0) will never be returned. Conversely, if the output - is closed at one end, then the extreme value is a possible - return value. - - The values returned are as follows. All values are returned - with equal frequency, except as noted in the closed_closed case: - - closed_open: Let P=min(M,W) - there are 2^P possible output values: - {0, 1, 2, ..., 2^P-1}/2^P - - open_closed: Let P=min(M,W) - there are 2^P possible values: - {1, 2, ..., 2^P}/2^P - - open_open: Let P=min(M, W+1) - there are 2^(P-1) possible values: - {1, 3, 5, ..., 2^P-1}/2^P - - closed_closed: Let P=min(M, W-1) - there are 1+2^P possible values: - {0, 1, 2, ... 2^P}/2^P - The extreme values (0.0 and 1.0) are - returned with half the frequency of - all others. - - On x86 hardware, especially on 32bit machines, the use of - internal 80bit x87-style floating point may result in - 'bonus' precision, which may cause closed intervals to not - be really closed, i.e. the conversions below might not - convert UINT{32,64}_MAX to 1.0. This sort of issue is - likely to occur when storing the output of a u01fixedpt_*_32_float - function in a double, though one can imagine getting extra - precision artifacts when going from 64_53 as well. Other - artifacts may exist on some GPU hardware. The tests in - kat_u01_main.h try to expose such issues, but caveat emptor. - - @cond HIDDEN_FROM_DOXYGEN - */ - -/* Hex floats were standardized by C in 1999, but weren't standardized - by C++ until 2011. So, we're obliged to write out our constants in - decimal, even though they're most naturally expressed in binary. - We cross our fingers and hope that the compiler does the compile-time - constant arithmetic properly. -*/ -#define R123_0x1p_31f (1.f/(1024.f*1024.f*1024.f*2.f)) -#define R123_0x1p_24f (128.f*R123_0x1p_31f) -#define R123_0x1p_23f (256.f*R123_0x1p_31f) -#define R123_0x1p_32 (1./(1024.*1024.*1024.*4.)) -#define R123_0x1p_63 (2.*R123_0x1p_32*R123_0x1p_32) -#define R123_0x1p_53 (1024.*R123_0x1p_63) -#define R123_0x1p_52 (2048.*R123_0x1p_63) - -/** @endcond */ - -#ifndef R123_USE_U01_DOUBLE -#define R123_USE_U01_DOUBLE 1 -#endif - -#ifdef __cplusplus -extern "C"{ -#endif - -/* narrowing conversions: uint32_t to float */ -R123_CUDA_DEVICE R123_STATIC_INLINE float u01fixedpt_closed_closed_32_float(uint32_t i){ - /* N.B. we ignore the high bit, so output is not monotonic */ - return ((i&0x7fffffc0) + (i&0x40))*R123_0x1p_31f; /* 0x1.p-31f */ -} - -R123_CUDA_DEVICE R123_STATIC_INLINE float u01fixedpt_closed_open_32_float(uint32_t i){ - return (i>>8)*R123_0x1p_24f; /* 0x1.0p-24f; */ -} - -R123_CUDA_DEVICE R123_STATIC_INLINE float u01fixedpt_open_closed_32_float(uint32_t i){ - return (1+(i>>8))*R123_0x1p_24f; /* *0x1.0p-24f; */ -} - -R123_CUDA_DEVICE R123_STATIC_INLINE float u01fixedpt_open_open_32_float(uint32_t i){ - return (0.5f+(i>>9))*R123_0x1p_23f; /* 0x1.p-23f; */ -} - -#if R123_USE_U01_DOUBLE -/* narrowing conversions: uint64_t to double */ -R123_CUDA_DEVICE R123_STATIC_INLINE double u01fixedpt_closed_closed_64_double(uint64_t i){ - /* N.B. we ignore the high bit, so output is not monotonic */ - return ((i&R123_64BIT(0x7ffffffffffffe00)) + (i&0x200))*R123_0x1p_63; /* 0x1.p-63; */ -} - -R123_CUDA_DEVICE R123_STATIC_INLINE double u01fixedpt_closed_open_64_double(uint64_t i){ - return (i>>11)*R123_0x1p_53; /* 0x1.0p-53; */ -} - -R123_CUDA_DEVICE R123_STATIC_INLINE double u01fixedpt_open_closed_64_double(uint64_t i){ - return (1+(i>>11))*R123_0x1p_53; /* 0x1.0p-53; */ -} - -R123_CUDA_DEVICE R123_STATIC_INLINE double u01fixedpt_open_open_64_double(uint64_t i){ - return (0.5+(i>>12))*R123_0x1p_52; /* 0x1.0p-52; */ -} - -/* widening conversions: u32 to double */ -R123_CUDA_DEVICE R123_STATIC_INLINE double u01fixedpt_closed_closed_32_double(uint32_t i){ - /* j = i+(i&1) takes on 2^31+1 possible values with a 'trapezoid' distribution: - p_j = 1 0 2 0 2 .... 2 0 2 0 1 - j = 0 1 2 3 4 .... 2^32 - by converting to double *before* doing the add, we don't wrap the high bit. - */ - return (((double)(i&1)) + i)*R123_0x1p_32; /* 0x1.p-32; */ -} - -R123_CUDA_DEVICE R123_STATIC_INLINE double u01fixedpt_closed_open_32_double(uint32_t i){ - return i*R123_0x1p_32; /* 0x1.p-32; */ -} - -R123_CUDA_DEVICE R123_STATIC_INLINE double u01fixedpt_open_closed_32_double(uint32_t i){ - return (1.+i)*R123_0x1p_32; /* 0x1.p-32; */ -} - -R123_CUDA_DEVICE R123_STATIC_INLINE double u01fixedpt_open_open_32_double(uint32_t i){ - return (0.5+i)*R123_0x1p_32; /* 0x1.p-32; */ -} -#endif /* R123_USE_U01_DOUBLE */ - -#ifdef __cplusplus -} -#endif - -/** @} */ -#endif diff --git a/ext/random123/include/Random123/uniform.hpp b/ext/random123/include/Random123/uniform.hpp deleted file mode 100644 index 5b3f3f12..00000000 --- a/ext/random123/include/Random123/uniform.hpp +++ /dev/null @@ -1,310 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef __r123_uniform_dot_hpp -#define __r123_uniform_dot_hpp - -/** @defgroup uniform Uniform distribution scalar conversion functions - -This file provides some simple functions that can be used to convert -integers of various widths to floats and doubles with various -characteristics. It can be used to generate real-valued, uniformly -distributed random variables from the random integers produced by -the Random123 CBRNGs. - -There are three templated functions: - - - u01: output is as dense as possible in (0,1}, never 0.0. May - return 1.0 if and only if the number of output mantissa bits - is less than the width of the input. - - - uneg11: output is as dense as possible in {-1,1}, never 0.0. May - return 1.0 or -1.0 if and only if the number of output mantissa bits - is less than the width of the input. - - - u01fixedpt: output is "fixed point", equispaced, open at both ends, - and is never 0.0, 0.5 nor 1.0. - -The behavior of u01 and uneg11 depend on the pre-processor symbol: -R123_UNIFORM_FLOAT_STORE. When #defined to a non-zero value, u01 -and uneg11 declare a volatile intermediate result, with the -intention of forcing architectures that have "extra bits" in their -floating point registers to more closely conform to IEEE -arithmetic. When compiled this way, u01 and uneg11 will be -significantly slower, as they will incur a memory write and read on -every call. Without it, they may fail the "known answer test" -implemented in ut_uniform_IEEEkat.cpp even though they perform -perfectly reasonable int to float conversions. We have used -this option to get 32-bit x86 to produce the same results as -64-bit x86-64 code, but we do not recommend it for normal -use. - -Three additional functions are defined when C++11 or newer is in use: - - - u01all - - uneg11all - - u01fixedptall - -These functions apply the corresponding conversion to every -element of their argument, which must be a staticly sized -array, e.g., an r123array or a std::array of an integer type. - -This file may not be as portable, and has not been tested as -rigorously as other files in the library, e.g., the generators. -Nevertheless, we hope it is useful and we encourage developers to -copy it and modify it for their own use. We invite comments and -improvements. -*/ - -#include <Random123/features/compilerfeatures.h> -#include <limits> -#if R123_USE_CXX11_TYPE_TRAITS -#include <type_traits> -#endif -#if __cplusplus >= 201103L -#include <array> -#endif - -namespace r123{ -/** -@{ -@cond HIDDEN_FROM_DOXYGEN -*/ - -#if R123_USE_CXX11_TYPE_TRAITS -using std::make_signed; -using std::make_unsigned; -#else -// Sigh... We could try to find another <type_traits>, e.g., from -// boost or TR1. Or we can do it ourselves in the r123 namespace. -// It's not clear which will cause less headache... -template <typename T> struct make_signed{}; -template <typename T> struct make_unsigned{}; -#define R123_MK_SIGNED_UNSIGNED(ST, UT) \ -template<> struct make_signed<ST>{ typedef ST type; }; \ -template<> struct make_signed<UT>{ typedef ST type; }; \ -template<> struct make_unsigned<ST>{ typedef UT type; }; \ -template<> struct make_unsigned<UT>{ typedef UT type; } - -R123_MK_SIGNED_UNSIGNED(int8_t, uint8_t); -R123_MK_SIGNED_UNSIGNED(int16_t, uint16_t); -R123_MK_SIGNED_UNSIGNED(int32_t, uint32_t); -R123_MK_SIGNED_UNSIGNED(int64_t, uint64_t); -#if R123_USE_GNU_UINT128 -R123_MK_SIGNED_UNSIGNED(__int128_t, __uint128_t); -#endif -#undef R123_MK_SIGNED_UNSIGNED -#endif - -#if defined(_LIBCPP_HAS_NO_CONSTEXPR) -// Amazing! cuda thinks numeric_limits::max() is a __host__ function, so -// we can't use it in a device function. -// -// The LIBCPP_HAS_NO_CONSTEXP test catches situations where the libc++ -// library thinks that the compiler doesn't support constexpr, but we -// think it does. As a consequence, the library declares -// numeric_limits::max without constexpr. This workaround should only -// affect a narrow range of compiler/library pairings. -// -// In both cases, we find max() by computing ~(unsigned)0 right-shifted -// by is_signed. -template <typename T> -R123_CONSTEXPR R123_STATIC_INLINE R123_CUDA_DEVICE T maxTvalue(){ - typedef typename make_unsigned<T>::type uT; - return (~uT(0)) >> std::numeric_limits<T>::is_signed; - } -#else -template <typename T> -R123_CONSTEXPR R123_STATIC_INLINE T maxTvalue(){ - return std::numeric_limits<T>::max(); -} -#endif -/** @endcond - @} - */ - -//! Return a uniform real value in (0, 1] -/** - @ingroup uniform - Input is a W-bit integer (signed or unsigned). It is cast to - a W-bit unsigned integer, multiplied by Ftype(2^-W) and added to - Ftype(2^(-W-1)). A good compiler should optimize it down to an - int-to-float conversion followed by a multiply and an add, which - might be fused, depending on the architecture. - - If the input is a uniformly distributed integer, and if Ftype - arithmetic follows IEEE754 round-to-nearest rules, then the - result is a uniformly distributed floating point number in (0, 1]. - -- The result is never exactly 0.0. -- The smallest value returned is 2^-(W-1). -- Let M be the number of mantissa bits in Ftype (typically 24 or 53). - - If W>M then the largest value retured is 1.0. - - If W<=M then the largest value returned is Ftype(1.0 - 2^(-W-1)). -*/ -template <typename Ftype, typename Itype> -R123_CUDA_DEVICE R123_STATIC_INLINE Ftype u01(Itype in){ - typedef typename make_unsigned<Itype>::type Utype; - R123_CONSTEXPR Ftype factor = Ftype(1.)/(maxTvalue<Utype>() + Ftype(1.)); - R123_CONSTEXPR Ftype halffactor = Ftype(0.5)*factor; -#if R123_UNIFORM_FLOAT_STORE - volatile Ftype x = Utype(in)*factor; return x+halffactor; -#else - return Utype(in)*factor + halffactor; -#endif -} - -//! Return a signed value in [-1,1] -/** - @ingroup uniform - The argument is converted to a W-bit signed integer, multiplied by Ftype(2^-(W-1)) and - then added to Ftype(2^-W). A good compiler should optimize - it down to an int-to-float conversion followed by a multiply and - an add, which might be fused, depending on the architecture. - - If the input is a uniformly distributed integer, and if Ftype - arithmetic follows IEEE754 round-to-nearest rules, then the - output is a uniformly distributed floating point number in [-1, 1]. - -- The result is never exactly 0.0. -- The smallest absolute value returned is 2^-W -- Let M be the number of mantissa bits in Ftype. - - If W>M then the largest value retured is 1.0 and the smallest is -1.0. - - If W<=M then the largest value returned is the Ftype(1.0 - 2^-W) - and the smallest value returned is -Ftype(1.0 - 2^-W). -*/ -template <typename Ftype, typename Itype> -R123_CUDA_DEVICE R123_STATIC_INLINE Ftype uneg11(Itype in){ - typedef typename make_signed<Itype>::type Stype; - R123_CONSTEXPR Ftype factor = Ftype(1.)/(maxTvalue<Stype>() + Ftype(1.)); - R123_CONSTEXPR Ftype halffactor = Ftype(0.5)*factor; -#if R123_UNIFORM_FLOAT_STORE - volatile Ftype x = Stype(in)*factor; return x+halffactor; -#else - return Stype(in)*factor + halffactor; -#endif -} - -//! Return a value in (0,1) chosen from a set of equally spaced fixed-point values -/** - @ingroup uniform - Let: - - W = width of Itype, e.g., 32 or 64, regardless of signedness. - - M = mantissa bits of Ftype, e.g., 24, 53 or 64 - - B = min(M, W) - - Then the 2^(B-1) possible output values are: 2^-B*{1, 3, 5, ..., 2^B - 1} - - The smallest output is: 2^-B - - The largest output is: 1 - 2^-B - - The output is never exactly 0.0, nor 0.5, nor 1.0. - - The 2^(B-1) possible outputs: - - are equally likely, - - are uniformly spaced by 2^-(B-1), - - are balanced around 0.5 -*/ -template <typename Ftype, typename Itype> -R123_CUDA_DEVICE R123_STATIC_INLINE Ftype u01fixedpt(Itype in){ - typedef typename make_unsigned<Itype>::type Utype; - R123_CONSTEXPR int excess = std::numeric_limits<Utype>::digits - std::numeric_limits<Ftype>::digits; - if(excess>=0){ - R123_CONSTEXPR int ex_nowarn = (excess>=0) ? excess : 0; - R123_CONSTEXPR Ftype factor = Ftype(1.)/(Ftype(1.) + ((maxTvalue<Utype>()>>ex_nowarn))); - return (1 | (Utype(in)>>ex_nowarn)) * factor; - }else - return u01<Ftype>(in); -} - -#if R123_USE_CXX11_STD_ARRAY - -//! Apply u01 to every item in an r123array, returning a std::array -/** @ingroup uniform - * Only in C++11 and newer. - * The argument type may be any integer collection with a constexpr static_size member, - * e.g., an r123array or a std::array of an integer type. - */ -template <typename Ftype, typename CollType> -static inline -std::array<Ftype, CollType::static_size> u01all(CollType in) -{ - std::array<Ftype, CollType::static_size> ret; - size_t i=0; - for(auto e : in){ - ret[i++] = u01<Ftype>(e); - } - return ret; -} - -//! Apply uneg11 to every item in an r123array, returning a std::array -/** @ingroup uniform - * Only in C++11 and newer. - * The argument type may be any integer collection with a constexpr static_size member, - * e.g., an r123array or a std::array of an integer type. - */ -template <typename Ftype, typename CollType> -static inline -std::array<Ftype, CollType::static_size> uneg11all(CollType in) -{ - std::array<Ftype, CollType::static_size> ret; - size_t i=0; - for(auto e : in){ - ret[i++] = uneg11<Ftype>(e); - } - return ret; -} - -//! Apply u01fixedpt to every item in an r123array, returning a std::array -/** @ingroup uniform - * Only in C++11 and newer. - * The argument type may be any integer collection with a constexpr static_size member, - * e.g., an r123array or a std::array of an integer type. -*/ -template <typename Ftype, typename CollType> -static inline -std::array<Ftype, CollType::static_size> u01fixedptall(CollType in) -{ - std::array<Ftype, CollType::static_size> ret; - size_t i=0; - for(auto e : in){ - ret[i++] = u01fixedpt<Ftype>(e); - } - return ret; -} -#endif // __cplusplus >= 201103L - -} // namespace r123 - -#endif - diff --git a/spack/package.py b/spack/package.py index 51877c76..3a04b47a 100644 --- a/spack/package.py +++ b/spack/package.py @@ -39,6 +39,7 @@ class Arbor(CMakePackage, CudaPackage): # misc dependencies depends_on('fmt@7.1:', when='@0.5.3:') # required by the modcc compiler depends_on('nlohmann-json') + depends_on('random123') depends_on('cuda@10:', when='+cuda') depends_on('libxml2', when='+neuroml') -- GitLab