From 2f6d1607c84bd828e77a44465e0dccfd3524d7a6 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 4 Oct 2016 08:38:23 -0700 Subject: Cleaned up the random number generation code. --- unsupported/Eigen/CXX11/Tensor | 4 +- .../Eigen/CXX11/src/Tensor/TensorFunctors.h | 442 --------------------- unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h | 276 +++++++++++++ 3 files changed, 277 insertions(+), 445 deletions(-) create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h (limited to 'unsupported/Eigen/CXX11') diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 1d9f89864..4976a1254 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -61,9 +61,6 @@ typedef unsigned __int64 uint64_t; #ifdef EIGEN_USE_GPU #include #include -#if defined(__CUDACC__) -#include -#endif #if __cplusplus >= 201103L #include #include @@ -83,6 +80,7 @@ typedef unsigned __int64 uint64_t; #include "src/Tensor/TensorDimensions.h" #include "src/Tensor/TensorInitializer.h" #include "src/Tensor/TensorTraits.h" +#include "src/Tensor/TensorRandom.h" #include "src/Tensor/TensorUInt128.h" #include "src/Tensor/TensorIntDiv.h" #include "src/Tensor/TensorGlobalFunctions.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index eddb86597..7164e8d60 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -441,448 +441,6 @@ struct reducer_traits, Device> { }; -// Random number generation -namespace { -#ifdef __CUDA_ARCH__ -__device__ int get_random_seed() { - return clock(); -} -#else -static inline int get_random_seed() { -#ifdef _WIN32 - SYSTEMTIME st; - GetSystemTime(&st); - return st.wSecond + 1000 * st.wMilliseconds; -#elif defined __APPLE__ - return static_cast(mach_absolute_time()); -#else - timespec ts; - clock_gettime(CLOCK_REALTIME, &ts); - return static_cast(ts.tv_nsec); -#endif -} -#endif -} - -#if !defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__) -// We're not compiling a cuda kernel -template class UniformRandomGenerator { - - public: - static const bool PacketAccess = true; - - UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - if (!deterministic) { - srand(get_random_seed()); - } - } - UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - } - - T operator()() const { - return random(); - } - template - PacketType packetOp() const { - const int packetSize = internal::unpacket_traits::size; - EIGEN_ALIGN_MAX T values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = random(); - } - return internal::pload(values); - } - - private: - bool m_deterministic; -}; - -#if __cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900 -template <> class UniformRandomGenerator { - public: - static const bool PacketAccess = true; - - UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_generator(new std::mt19937()) { - if (!deterministic) { - m_generator->seed(get_random_seed()); - } - } - UniformRandomGenerator(const UniformRandomGenerator& other) { - m_generator = new std::mt19937(); - m_generator->seed(other() * UINT_MAX); - m_deterministic = other.m_deterministic; - } - ~UniformRandomGenerator() { - delete m_generator; - } - - float operator()() const { - return m_distribution(*m_generator); - } - template - PacketType packetOp() const { - const int packetSize = internal::unpacket_traits::size; - EIGEN_ALIGN_MAX float values[packetSize]; - for (int k = 0; k < packetSize; ++k) { - values[k] = this->operator()(); - } - return internal::pload(values); - } - - private: - UniformRandomGenerator& operator = (const UniformRandomGenerator&); - // Make sure m_deterministic comes first to match the layout of the cpu - // version of the code. - bool m_deterministic; - std::mt19937* m_generator; - mutable std::uniform_real_distribution m_distribution; -}; - -template <> class UniformRandomGenerator { - public: - static const bool PacketAccess = true; - - UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_generator(new std::mt19937()) { - if (!deterministic) { - m_generator->seed(get_random_seed()); - } - } - UniformRandomGenerator(const UniformRandomGenerator& other) { - m_generator = new std::mt19937(); - m_generator->seed(other() * UINT_MAX); - m_deterministic = other.m_deterministic; - } - ~UniformRandomGenerator() { - delete m_generator; - } - - double operator()() const { - return m_distribution(*m_generator); - } - template - PacketType packetOp() const { - const int packetSize = internal::unpacket_traits::size; - EIGEN_ALIGN_MAX double values[packetSize]; - for (int k = 0; k < packetSize; ++k) { - values[k] = this->operator()(); - } - return internal::pload(values); - } - - private: - UniformRandomGenerator& operator = (const UniformRandomGenerator&); - // Make sure m_deterministic comes first to match the layout of the cpu - // version of the code. - bool m_deterministic; - std::mt19937* m_generator; - mutable std::uniform_real_distribution m_distribution; -}; -#endif - -#else - -// We're compiling a cuda kernel -template class UniformRandomGenerator; - -template <> class UniformRandomGenerator { - public: - static const bool PacketAccess = true; - - __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - - __device__ float operator()() const { - return curand_uniform(&m_state); - } - template - __device__ float4 packetOp() const { - EIGEN_STATIC_ASSERT((is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return curand_uniform4(&m_state); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class UniformRandomGenerator { - public: - static const bool PacketAccess = true; - - __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ double operator()() const { - return curand_uniform_double(&m_state); - } - template - __device__ double2 packetOp() const { - EIGEN_STATIC_ASSERT((is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return curand_uniform2_double(&m_state); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class UniformRandomGenerator > { - public: - static const bool PacketAccess = false; - - __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ std::complex operator()() const { - float4 vals = curand_uniform4(&m_state); - return std::complex(vals.x, vals.y); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class UniformRandomGenerator > { - public: - static const bool PacketAccess = false; - - __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ std::complex operator()() const { - double2 vals = curand_uniform2_double(&m_state); - return std::complex(vals.x, vals.y); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -#endif - -template -struct functor_traits > { - enum { - // Rough estimate. - Cost = 100 * NumTraits::MulCost, - PacketAccess = UniformRandomGenerator::PacketAccess - }; -}; - - - -#if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && (__cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900) -// We're not compiling a cuda kernel -template class NormalRandomGenerator { - public: - static const bool PacketAccess = true; - - NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_distribution(0, 1), m_generator(new std::mt19937()) { - if (!deterministic) { - m_generator->seed(get_random_seed()); - } - } - NormalRandomGenerator(const NormalRandomGenerator& other) - : m_deterministic(other.m_deterministic), m_distribution(other.m_distribution), m_generator(new std::mt19937()) { - m_generator->seed(other() * UINT_MAX); - } - ~NormalRandomGenerator() { - delete m_generator; - } - T operator()() const { - return m_distribution(*m_generator); - } - template - PacketType packetOp() const { - const int packetSize = internal::unpacket_traits::size; - EIGEN_ALIGN_MAX T values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = m_distribution(*m_generator); - } - return internal::pload(values); - } - - private: - // No assignment - NormalRandomGenerator& operator = (const NormalRandomGenerator&); - - bool m_deterministic; - mutable std::normal_distribution m_distribution; - std::mt19937* m_generator; -}; - -#elif defined (EIGEN_USE_GPU) && defined(__CUDACC__) && defined(__CUDA_ARCH__) - -// We're compiling a cuda kernel -template class NormalRandomGenerator; - -template <> class NormalRandomGenerator { - public: - static const bool PacketAccess = true; - - __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ float operator()() const { - return curand_normal(&m_state); - } - template - __device__ float4 packetOp() const { - EIGEN_STATIC_ASSERT((is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return curand_normal4(&m_state); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class NormalRandomGenerator { - public: - static const bool PacketAccess = true; - - __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ double operator()() const { - return curand_normal_double(&m_state); - } - template - __device__ double2 packetOp() const { - EIGEN_STATIC_ASSERT((is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return curand_normal2_double(&m_state); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class NormalRandomGenerator > { - public: - static const bool PacketAccess = false; - - __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ std::complex operator()() const { - float4 vals = curand_normal4(&m_state); - return std::complex(vals.x, vals.y); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class NormalRandomGenerator > { - public: - static const bool PacketAccess = false; - - __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ std::complex operator()() const { - double2 vals = curand_normal2_double(&m_state); - return std::complex(vals.x, vals.y); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -#else - -template class NormalRandomGenerator { - public: - static const bool PacketAccess = false; - NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {} - - private: - bool m_deterministic; -}; - -#endif - -template -struct functor_traits > { - enum { - // Rough estimate. - Cost = 100 * NumTraits::MulCost, - PacketAccess = NormalRandomGenerator::PacketAccess - }; -}; - - template class GaussianGenerator { public: diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h new file mode 100644 index 000000000..9b16e68f5 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h @@ -0,0 +1,276 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H +#define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H + +namespace Eigen { +namespace internal { + +namespace { + +EIGEN_DEVICE_FUNC uint64_t get_random_seed() { +#ifdef __CUDA_ARCH__ + // We don't support 3d kernels since we currently only use 1 and + // 2d kernels. + assert(threadIdx.z == 0); + return clock64() + + blockIdx.x * blockDim.x + threadIdx.x + + gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y); + +#elif defined _WIN32 + // Use the current time as a baseline. + GetSystemTime(&st); + int time = st.wSecond + 1000 * st.wMilliseconds; + // Mix in a random number to make sure that we get different seeds if + // we try to generate seeds faster than the clock resolution. + // We need 2 random values since the generator only generate 16 bits at + // a time (https://msdn.microsoft.com/en-us/library/398ax69y.aspx) + SYSTEMTIME st; + uint rnd1 = ::rand(); + uint rnd2 = ::rand(); + uint64_t rnd = (rnd1 | rnd2 << 16) ^ time; + return rnd; + +#elif defined __APPLE__ + // Same approach as for win32, except that the random number generator + // is better (// https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man3/random.3.html#//apple_ref/doc/man/3/random). + uint64_t rnd = ::random() ^ mach_absolute_time(); + return rnd; + +#else + // Augment the current time with pseudo random number generation + // to ensure that we get different seeds if we try to generate seeds + // faster than the clock resolution. + timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + uint64_t rnd = ::random() ^ ts.tv_nsec; + return rnd; +#endif +} + +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state) { + // TODO: Unify with the implementation in the non blocking thread pool. + uint64_t current = *state; + // Update the internal state + *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; + // Generate the random output (using the PCG-XSH-RS scheme) + return (current ^ (current >> 22)) >> (22 + (current >> 61)); +} + +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) { + seed = seed ? seed : get_random_seed(); + return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; +} + +} // namespace + + +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +T RandomToTypeUniform(uint64_t* state) { + unsigned rnd = PCG_XSH_RS_generator(state); + return static_cast(rnd); +} + + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Eigen::half RandomToTypeUniform(uint64_t* state) { + Eigen::half result; + // Generate 10 random bits for the mantissa + unsigned rnd = PCG_XSH_RS_generator(state); + result.x = static_cast(rnd & 0x3ffu); + // Set the exponent + result.x |= (static_cast(15) << 10); + // Return the final result + return result - Eigen::half(1.0f); +} + + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float RandomToTypeUniform(uint64_t* state) { + typedef union { + uint32_t raw; + float fp; + } internal; + internal result; + // Generate 23 random bits for the mantissa mantissa + const unsigned rnd = PCG_XSH_RS_generator(state); + result.raw = rnd & 0x7fffffu; + // Set the exponent + result.raw |= (static_cast(127) << 23); + // Return the final result + return result.fp - 1.0f; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double RandomToTypeUniform(uint64_t* state) { + typedef union { + uint64_t raw; + double dp; + } internal; + internal result; + result.raw = 0; + // Generate 52 random bits for the mantissa + // First generate the upper 20 bits + unsigned rnd1 = PCG_XSH_RS_generator(state) & 0xfffffu; + // The generate the lower 32 bits + unsigned rnd2 = PCG_XSH_RS_generator(state); + result.raw = (static_cast(rnd1) << 32) | rnd2; + // Set the exponent + result.raw |= (static_cast(1023) << 52); + // Return the final result + return result.dp - 1.0; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeUniform >(uint64_t* state) { + return std::complex(RandomToTypeUniform(state), + RandomToTypeUniform(state)); +} +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeUniform >(uint64_t* state) { + return std::complex(RandomToTypeUniform(state), + RandomToTypeUniform(state)); +} + +template class UniformRandomGenerator { + public: + static const bool PacketAccess = true; + + // Uses the given "seed" if non-zero, otherwise uses a random seed. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( + uint64_t seed = 0) { + m_state = PCG_XSH_RS_state(seed); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( + const UniformRandomGenerator& other) { + m_state = other.m_state; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T operator()(Index i) const { + uint64_t local_state = m_state + i; + T result = RandomToTypeUniform(&local_state); + m_state = local_state; + return result; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet packetOp(Index i) const { + const int packetSize = internal::unpacket_traits::size; + EIGEN_ALIGN_MAX T values[packetSize]; + uint64_t local_state = m_state + i; + for (int j = 0; j < packetSize; ++j) { + values[j] = RandomToTypeUniform(&local_state); + } + m_state = local_state; + return internal::pload(values); + } + + private: + mutable uint64_t m_state; +}; + +template +struct functor_traits > { + enum { + // Rough estimate for floating point, multiplied by ceil(sizeof(T) / sizeof(float)). + Cost = 12 * NumTraits::AddCost * + ((sizeof(Scalar) + sizeof(float) - 1) / sizeof(float)), + PacketAccess = UniformRandomGenerator::PacketAccess + }; +}; + + + +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +T RandomToTypeNormal(uint64_t* state) { + // Use the ratio of uniform method to generate numbers following a normal + // distribution. See for example Numerical Recipes chapter 7.3.9 for the + // details. + T u, v, q; + do { + u = RandomToTypeUniform(state); + v = T(1.7156) * (RandomToTypeUniform(state) - T(0.5)); + const T x = u - T(0.449871); + const T y = numext::abs(v) + T(0.386595); + q = x*x + y * (T(0.196)*y - T(0.25472)*x); + } while (q > T(0.27597) && + (q > T(0.27846) || v*v > T(-4) * numext::log(u) * u*u)); + + return v/u; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeNormal >(uint64_t* state) { + return std::complex(RandomToTypeNormal(state), + RandomToTypeNormal(state)); +} +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeNormal >(uint64_t* state) { + return std::complex(RandomToTypeNormal(state), + RandomToTypeNormal(state)); +} + + +template class NormalRandomGenerator { + public: + static const bool PacketAccess = true; + + // Uses the given "seed" if non-zero, otherwise uses a random seed. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) { + m_state = PCG_XSH_RS_state(seed); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator( + const NormalRandomGenerator& other) { + m_state = other.m_state; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T operator()(Index i) const { + uint64_t local_state = m_state + i; + T result = RandomToTypeNormal(&local_state); + m_state = local_state; + return result; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet packetOp(Index i) const { + const int packetSize = internal::unpacket_traits::size; + EIGEN_ALIGN_MAX T values[packetSize]; + uint64_t local_state = m_state + i; + for (int j = 0; j < packetSize; ++j) { + values[j] = RandomToTypeNormal(&local_state); + } + m_state = local_state; + return internal::pload(values); + } + + private: + mutable uint64_t m_state; +}; + + +template +struct functor_traits > { + enum { + // On average, we need to generate about 3 random numbers + // 15 mul, 8 add, 1.5 logs + Cost = 3 * functor_traits >::Cost + + 15 * NumTraits::AddCost + 8 * NumTraits::AddCost + + 3 * functor_traits >::Cost / 2, + PacketAccess = NormalRandomGenerator::PacketAccess + }; +}; + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H -- cgit v1.2.3