From ac2e6e0d03c6027d9a1bbef356c2e149d8a9205a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 26 Feb 2016 13:52:24 -0800 Subject: Properly vectorized the random number generators --- .../Eigen/CXX11/src/Tensor/TensorFunctors.h | 101 ++++++++++++--------- 1 file changed, 60 insertions(+), 41 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 7796e1a88..528909688 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -342,17 +342,17 @@ template class UniformRandomGenerator { } template - T operator()(Index, Index = 0) const { + T operator()(Index) const { return random(); } - template - typename internal::packet_traits::type packetOp(Index, Index = 0) const { - const int packetSize = internal::packet_traits::size; + template + PacketType packetOp(Index) const { + const int packetSize = internal::unpacket_traits::size; EIGEN_ALIGN_MAX T values[packetSize]; for (int i = 0; i < packetSize; ++i) { values[i] = random(); } - return internal::pload::type>(values); + return internal::pload(values); } private: @@ -370,22 +370,22 @@ template <> class UniformRandomGenerator { } } UniformRandomGenerator(const UniformRandomGenerator& other) { - m_generator.seed(other(0, 0) * UINT_MAX); + m_generator.seed(other(0) * UINT_MAX); m_deterministic = other.m_deterministic; } template - float operator()(Index, Index = 0) const { + float operator()(Index) const { return m_distribution(m_generator); } - template - typename internal::packet_traits::type packetOp(Index i, Index j = 0) const { - const int packetSize = internal::packet_traits::size; + template + PacketType packetOp(Index i) const { + const int packetSize = internal::unpacket_traits::size; EIGEN_ALIGN_MAX float values[packetSize]; for (int k = 0; k < packetSize; ++k) { - values[k] = this->operator()(i, j); + values[k] = this->operator()(i); } - return internal::pload::type>(values); + return internal::pload(values); } private: @@ -407,22 +407,22 @@ template <> class UniformRandomGenerator { } } UniformRandomGenerator(const UniformRandomGenerator& other) { - m_generator.seed(other(0, 0) * UINT_MAX); + m_generator.seed(other(0) * UINT_MAX); m_deterministic = other.m_deterministic; } template - double operator()(Index, Index = 0) const { + double operator()(Index) const { return m_distribution(m_generator); } - template - typename internal::packet_traits::type packetOp(Index i, Index j = 0) const { - const int packetSize = internal::packet_traits::size; + template + PacketType packetOp(Index i) const { + const int packetSize = internal::unpacket_traits::size; EIGEN_ALIGN_MAX double values[packetSize]; for (int k = 0; k < packetSize; ++k) { - values[k] = this->operator()(i, j); + values[k] = this->operator()(i); } - return internal::pload::type>(values); + return internal::pload(values); } private: @@ -458,11 +458,12 @@ template <> class UniformRandomGenerator { } template - __device__ float operator()(Index, Index = 0) const { + __device__ float operator()(Index) const { return curand_uniform(&m_state); } - template - __device__ float4 packetOp(Index, Index = 0) const { + template + __device__ float4 packetOp(Index) const { + EIGEN_STATIC_ASSERT((is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); return curand_uniform4(&m_state); } @@ -487,11 +488,12 @@ template <> class UniformRandomGenerator { curand_init(seed, tid, 0, &m_state); } template - __device__ double operator()(Index, Index = 0) const { + __device__ double operator()(Index) const { return curand_uniform_double(&m_state); } - template - __device__ double2 packetOp(Index, Index = 0) const { + template + __device__ double2 packetOp(Index) const { + EIGEN_STATIC_ASSERT((is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); return curand_uniform2_double(&m_state); } @@ -516,7 +518,7 @@ template <> class UniformRandomGenerator > { curand_init(seed, tid, 0, &m_state); } template - __device__ std::complex operator()(Index, Index = 0) const { + __device__ std::complex operator()(Index) const { float4 vals = curand_uniform4(&m_state); return std::complex(vals.x, vals.y); } @@ -542,7 +544,7 @@ template <> class UniformRandomGenerator > { curand_init(seed, tid, 0, &m_state); } template - __device__ std::complex operator()(Index, Index = 0) const { + __device__ std::complex operator()(Index) const { double2 vals = curand_uniform2_double(&m_state); return std::complex(vals.x, vals.y); } @@ -554,6 +556,14 @@ template <> class UniformRandomGenerator > { #endif +template +struct functor_traits > { + enum { + PacketAccess = UniformRandomGenerator::PacketAccess + }; +}; + + #if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && __cplusplus > 199711 // We're not compiling a cuda kernel @@ -568,21 +578,21 @@ template class NormalRandomGenerator { } NormalRandomGenerator(const NormalRandomGenerator& other) : m_deterministic(other.m_deterministic), m_distribution(other.m_distribution) { - m_generator.seed(other(0, 0) * UINT_MAX); + m_generator.seed(other(0) * UINT_MAX); } template - T operator()(Index, Index = 0) const { + T operator()(Index) const { return m_distribution(m_generator); } - template - typename internal::packet_traits::type packetOp(Index, Index = 0) const { - const int packetSize = internal::packet_traits::size; + template + PacketType packetOp(Index) const { + const int packetSize = internal::unpacket_traits::size; EIGEN_ALIGN_MAX T values[packetSize]; for (int i = 0; i < packetSize; ++i) { values[i] = m_distribution(m_generator); } - return internal::pload::type>(values); + return internal::pload(values); } private: @@ -612,11 +622,12 @@ template <> class NormalRandomGenerator { curand_init(seed, tid, 0, &m_state); } template - __device__ float operator()(Index, Index = 0) const { + __device__ float operator()(Index) const { return curand_normal(&m_state); } - template - __device__ float4 packetOp(Index, Index = 0) const { + template + __device__ float4 packetOp(Index) const { + EIGEN_STATIC_ASSERT((is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); return curand_normal4(&m_state); } @@ -641,11 +652,12 @@ template <> class NormalRandomGenerator { curand_init(seed, tid, 0, &m_state); } template - __device__ double operator()(Index, Index = 0) const { + __device__ double operator()(Index) const { return curand_normal_double(&m_state); } - template - __device__ double2 packetOp(Index, Index = 0) const { + template + __device__ double2 packetOp(Index) const { + EIGEN_STATIC_ASSERT((is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); return curand_normal2_double(&m_state); } @@ -670,7 +682,7 @@ template <> class NormalRandomGenerator > { curand_init(seed, tid, 0, &m_state); } template - __device__ std::complex operator()(Index, Index = 0) const { + __device__ std::complex operator()(Index) const { float4 vals = curand_normal4(&m_state); return std::complex(vals.x, vals.y); } @@ -696,7 +708,7 @@ template <> class NormalRandomGenerator > { curand_init(seed, tid, 0, &m_state); } template - __device__ std::complex operator()(Index, Index = 0) const { + __device__ std::complex operator()(Index) const { double2 vals = curand_normal2_double(&m_state); return std::complex(vals.x, vals.y); } @@ -718,6 +730,13 @@ template class NormalRandomGenerator { #endif +template +struct functor_traits > { + enum { + PacketAccess = NormalRandomGenerator::PacketAccess + }; +}; + template class GaussianGenerator { -- cgit v1.2.3