From 37638dafd71e39407d22d4269b32d1c73b84feb8 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Jun 2016 10:29:52 -0700 Subject: Simplified the code that dispatches vectorized reductions on GPU --- unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h | 38 ++++++++++++++++--------- 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 82a905a65..0f3778e6e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -47,27 +47,39 @@ template <> struct max_n_1<0> { // Default packet types template -struct PacketType { +struct PacketType : internal::packet_traits { typedef typename internal::packet_traits::type type; - enum { size = internal::unpacket_traits::size }; }; // For CUDA packet types when using a GpuDevice #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) template <> - struct PacketType { +struct PacketType { typedef half2 type; static const int size = 2; - }; -template <> -struct PacketType { - typedef float4 type; - static const int size = 4; -}; -template <> -struct PacketType { - typedef double2 type; - static const int size = 2; + enum { + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasNegate = 1, + HasAbs = 1, + HasArg = 0, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 0, + HasSetLinear = 0, + HasBlend = 0, + + HasDiv = 1, + HasSqrt = 1, + HasRsqrt = 1, + HasExp = 1, + HasLog = 1, + HasLog1p = 0, + HasLog10 = 0, + HasPow = 1, + }; }; #endif -- cgit v1.2.3