From 767424af18a55604496f38dd4593542db97240a1 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Oct 2014 15:36:23 -0700 Subject: Improved the functors defined for standard reductions Added a functor to encapsulate the generation of random numbers on cpu and gpu. --- .../Eigen/CXX11/src/Tensor/TensorFunctors.h | 72 ++++++++++++++++++++-- 1 file changed, 68 insertions(+), 4 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 92984336c..e9aa22183 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -25,12 +25,12 @@ template struct SumReducer } private: - T m_sum; + typename internal::remove_all::type m_sum; }; template struct MaxReducer { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MaxReducer() : m_max((std::numeric_limits::min)()) { } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MaxReducer() : m_max(-(std::numeric_limits::max)()) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t) { if (t > m_max) { m_max = t; } } @@ -39,7 +39,7 @@ template struct MaxReducer } private: - T m_max; + typename internal::remove_all::type m_max; }; template struct MinReducer @@ -53,9 +53,73 @@ template struct MinReducer } private: - T m_min; + typename internal::remove_all::type m_min; }; + +#if !defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__) +// We're not compiling a cuda kernel +template struct UniformRandomGenerator { + template + T operator()(Index, Index = 0) const { + return random(); + } + template + typename internal::packet_traits::type packetOp(Index, Index = 0) const { + const int packetSize = internal::packet_traits::size; + EIGEN_ALIGN_DEFAULT T values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = random(); + } + return internal::pload::type>(values); + } +}; + +#else + +// We're compiling a cuda kernel +template struct UniformRandomGenerator; + +template <> struct UniformRandomGenerator { + UniformRandomGenerator() { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + curand_init(0, tid, 0, &m_state); + } + + template + float operator()(Index, Index = 0) const { + return curand_uniform(&m_state); + } + template + float4 packetOp(Index, Index = 0) const { + return curand_uniform4(&m_state); + } + + private: + mutable curandStatePhilox4_32_10_t m_state; +}; + +template <> struct UniformRandomGenerator { + UniformRandomGenerator() { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + curand_init(0, tid, 0, &m_state); + } + template + double operator()(Index, Index = 0) const { + return curand_uniform_double(&m_state); + } + template + double2 packetOp(Index, Index = 0) const { + return curand_uniform2_double(&m_state); + } + + private: + mutable curandStatePhilox4_32_10_t m_state; +}; + +#endif + + } // end namespace internal } // end namespace Eigen -- cgit v1.2.3