From 37638dafd71e39407d22d4269b32d1c73b84feb8 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Jun 2016 10:29:52 -0700 Subject: Simplified the code that dispatches vectorized reductions on GPU --- .../Eigen/CXX11/src/Tensor/TensorFunctors.h | 10 +++--- unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h | 38 ++++++++++++++-------- .../Eigen/CXX11/src/Tensor/TensorReductionCuda.h | 2 +- 3 files changed, 31 insertions(+), 19 deletions(-) (limited to 'unsupported/Eigen/CXX11/src') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index e6ff70460..a8e48fced 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -131,7 +131,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = packet_traits::type>::HasAdd + PacketAccess = PacketType::HasAdd }; }; @@ -183,7 +183,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = packet_traits::type>::HasAdd + PacketAccess = PacketType::HasAdd }; }; @@ -225,7 +225,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = packet_traits::type>::HasMax + PacketAccess = PacketType::HasMax }; }; @@ -267,7 +267,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = packet_traits::type>::HasMin + PacketAccess = PacketType::HasMin }; }; @@ -310,7 +310,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::MulCost, - PacketAccess = packet_traits::type>::HasMul + PacketAccess = PacketType::HasMul }; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 82a905a65..0f3778e6e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -47,27 +47,39 @@ template <> struct max_n_1<0> { // Default packet types template -struct PacketType { +struct PacketType : internal::packet_traits { typedef typename internal::packet_traits::type type; - enum { size = internal::unpacket_traits::size }; }; // For CUDA packet types when using a GpuDevice #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) template <> - struct PacketType { +struct PacketType { typedef half2 type; static const int size = 2; - }; -template <> -struct PacketType { - typedef float4 type; - static const int size = 4; -}; -template <> -struct PacketType { - typedef double2 type; - static const int size = 2; + enum { + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasNegate = 1, + HasAbs = 1, + HasArg = 0, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 0, + HasSetLinear = 0, + HasBlend = 0, + + HasDiv = 1, + HasSqrt = 1, + HasRsqrt = 1, + HasExp = 1, + HasLog = 1, + HasLog1p = 0, + HasLog10 = 0, + HasPow = 1, + }; }; #endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index 1b4fdd03f..d9bbcd858 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -328,7 +328,7 @@ struct FullReducer { // Unfortunately nvidia doesn't support well exotic types such as complex, // so reduce the scope of the optimized version of the code to the simple case // of floats and half floats. - #ifdef EIGEN_HAS_CUDA_FP16 +#ifdef EIGEN_HAS_CUDA_FP16 static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same::value || (internal::is_same::value && reducer_traits::PacketAccess)); -- cgit v1.2.3