aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-10-09 15:36:23 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-10-09 15:36:23 -0700
commit767424af18a55604496f38dd4593542db97240a1 (patch)
tree6500b0021fcc7abdca7093d7e75d0b39e2445c2b /unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
parent44beee9d68e13dc299c6e2ea321aedc74c23d039 (diff)
Improved the functors defined for standard reductions
Added a functor to encapsulate the generation of random numbers on cpu and gpu.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h72
1 files changed, 68 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
index 92984336c..e9aa22183 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
@@ -25,12 +25,12 @@ template <typename T> struct SumReducer
}
private:
- T m_sum;
+ typename internal::remove_all<T>::type m_sum;
};
template <typename T> struct MaxReducer
{
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MaxReducer() : m_max((std::numeric_limits<T>::min)()) { }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MaxReducer() : m_max(-(std::numeric_limits<T>::max)()) { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t) {
if (t > m_max) { m_max = t; }
}
@@ -39,7 +39,7 @@ template <typename T> struct MaxReducer
}
private:
- T m_max;
+ typename internal::remove_all<T>::type m_max;
};
template <typename T> struct MinReducer
@@ -53,9 +53,73 @@ template <typename T> struct MinReducer
}
private:
- T m_min;
+ typename internal::remove_all<T>::type m_min;
};
+
+#if !defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)
+// We're not compiling a cuda kernel
+template <typename T> struct UniformRandomGenerator {
+ template<typename Index>
+ T operator()(Index, Index = 0) const {
+ return random<T>();
+ }
+ template<typename Index>
+ typename internal::packet_traits<T>::type packetOp(Index, Index = 0) const {
+ const int packetSize = internal::packet_traits<T>::size;
+ EIGEN_ALIGN_DEFAULT T values[packetSize];
+ for (int i = 0; i < packetSize; ++i) {
+ values[i] = random<T>();
+ }
+ return internal::pload<typename internal::packet_traits<T>::type>(values);
+ }
+};
+
+#else
+
+// We're compiling a cuda kernel
+template <typename T> struct UniformRandomGenerator;
+
+template <> struct UniformRandomGenerator<float> {
+ UniformRandomGenerator() {
+ const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+ curand_init(0, tid, 0, &m_state);
+ }
+
+ template<typename Index>
+ float operator()(Index, Index = 0) const {
+ return curand_uniform(&m_state);
+ }
+ template<typename Index>
+ float4 packetOp(Index, Index = 0) const {
+ return curand_uniform4(&m_state);
+ }
+
+ private:
+ mutable curandStatePhilox4_32_10_t m_state;
+};
+
+template <> struct UniformRandomGenerator<double> {
+ UniformRandomGenerator() {
+ const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+ curand_init(0, tid, 0, &m_state);
+ }
+ template<typename Index>
+ double operator()(Index, Index = 0) const {
+ return curand_uniform_double(&m_state);
+ }
+ template<typename Index>
+ double2 packetOp(Index, Index = 0) const {
+ return curand_uniform2_double(&m_state);
+ }
+
+ private:
+ mutable curandStatePhilox4_32_10_t m_state;
+};
+
+#endif
+
+
} // end namespace internal
} // end namespace Eigen