diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-06-09 10:29:52 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-06-09 10:29:52 -0700 |
commit | 37638dafd71e39407d22d4269b32d1c73b84feb8 (patch) | |
tree | 7e3c3855a6690334deb696e41c47090d67bba692 /unsupported/Eigen | |
parent | 66796e843df723eeac04d6dc725f6a8b27a574ba (diff) |
Simplified the code that dispatches vectorized reductions on GPU
Diffstat (limited to 'unsupported/Eigen')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h | 10 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h | 38 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h | 2 |
3 files changed, 31 insertions, 19 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index e6ff70460..a8e48fced 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -131,7 +131,7 @@ template <typename T, typename Device> struct reducer_traits<SumReducer<T>, Device> { enum { Cost = NumTraits<T>::AddCost, - PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasAdd + PacketAccess = PacketType<T, Device>::HasAdd }; }; @@ -183,7 +183,7 @@ template <typename T, typename Device> struct reducer_traits<MeanReducer<T>, Device> { enum { Cost = NumTraits<T>::AddCost, - PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasAdd + PacketAccess = PacketType<T, Device>::HasAdd }; }; @@ -225,7 +225,7 @@ template <typename T, typename Device> struct reducer_traits<MaxReducer<T>, Device> { enum { Cost = NumTraits<T>::AddCost, - PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasMax + PacketAccess = PacketType<T, Device>::HasMax }; }; @@ -267,7 +267,7 @@ template <typename T, typename Device> struct reducer_traits<MinReducer<T>, Device> { enum { Cost = NumTraits<T>::AddCost, - PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasMin + PacketAccess = PacketType<T, Device>::HasMin }; }; @@ -310,7 +310,7 @@ template <typename T, typename Device> struct reducer_traits<ProdReducer<T>, Device> { enum { Cost = NumTraits<T>::MulCost, - PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasMul + PacketAccess = PacketType<T, Device>::HasMul }; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 82a905a65..0f3778e6e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -47,27 +47,39 @@ template <> struct max_n_1<0> { // Default packet types template <typename Scalar, typename Device> -struct PacketType { +struct PacketType : internal::packet_traits<Scalar> { typedef typename internal::packet_traits<Scalar>::type type; - enum { size = internal::unpacket_traits<type>::size }; }; // For CUDA packet types when using a GpuDevice #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) template <> - struct PacketType<half, GpuDevice> { +struct PacketType<half, GpuDevice> { typedef half2 type; static const int size = 2; - }; -template <> -struct PacketType<float, GpuDevice> { - typedef float4 type; - static const int size = 4; -}; -template <> -struct PacketType<double, GpuDevice> { - typedef double2 type; - static const int size = 2; + enum { + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasNegate = 1, + HasAbs = 1, + HasArg = 0, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 0, + HasSetLinear = 0, + HasBlend = 0, + + HasDiv = 1, + HasSqrt = 1, + HasRsqrt = 1, + HasExp = 1, + HasLog = 1, + HasLog1p = 0, + HasLog10 = 0, + HasPow = 1, + }; }; #endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index 1b4fdd03f..d9bbcd858 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -328,7 +328,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> { // Unfortunately nvidia doesn't support well exotic types such as complex, // so reduce the scope of the optimized version of the code to the simple case // of floats and half floats. - #ifdef EIGEN_HAS_CUDA_FP16 +#ifdef EIGEN_HAS_CUDA_FP16 static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same<typename Self::CoeffReturnType, float>::value || (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess)); |