aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-06-09 10:29:52 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-06-09 10:29:52 -0700
commit37638dafd71e39407d22d4269b32d1c73b84feb8 (patch)
tree7e3c3855a6690334deb696e41c47090d67bba692 /unsupported
parent66796e843df723eeac04d6dc725f6a8b27a574ba (diff)
Simplified the code that dispatches vectorized reductions on GPU
Diffstat (limited to 'unsupported')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h10
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h38
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h2
3 files changed, 31 insertions, 19 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
index e6ff70460..a8e48fced 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
@@ -131,7 +131,7 @@ template <typename T, typename Device>
struct reducer_traits<SumReducer<T>, Device> {
enum {
Cost = NumTraits<T>::AddCost,
- PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasAdd
+ PacketAccess = PacketType<T, Device>::HasAdd
};
};
@@ -183,7 +183,7 @@ template <typename T, typename Device>
struct reducer_traits<MeanReducer<T>, Device> {
enum {
Cost = NumTraits<T>::AddCost,
- PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasAdd
+ PacketAccess = PacketType<T, Device>::HasAdd
};
};
@@ -225,7 +225,7 @@ template <typename T, typename Device>
struct reducer_traits<MaxReducer<T>, Device> {
enum {
Cost = NumTraits<T>::AddCost,
- PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasMax
+ PacketAccess = PacketType<T, Device>::HasMax
};
};
@@ -267,7 +267,7 @@ template <typename T, typename Device>
struct reducer_traits<MinReducer<T>, Device> {
enum {
Cost = NumTraits<T>::AddCost,
- PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasMin
+ PacketAccess = PacketType<T, Device>::HasMin
};
};
@@ -310,7 +310,7 @@ template <typename T, typename Device>
struct reducer_traits<ProdReducer<T>, Device> {
enum {
Cost = NumTraits<T>::MulCost,
- PacketAccess = packet_traits<typename PacketType<T, Device>::type>::HasMul
+ PacketAccess = PacketType<T, Device>::HasMul
};
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
index 82a905a65..0f3778e6e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
@@ -47,27 +47,39 @@ template <> struct max_n_1<0> {
// Default packet types
template <typename Scalar, typename Device>
-struct PacketType {
+struct PacketType : internal::packet_traits<Scalar> {
typedef typename internal::packet_traits<Scalar>::type type;
- enum { size = internal::unpacket_traits<type>::size };
};
// For CUDA packet types when using a GpuDevice
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
template <>
- struct PacketType<half, GpuDevice> {
+struct PacketType<half, GpuDevice> {
typedef half2 type;
static const int size = 2;
- };
-template <>
-struct PacketType<float, GpuDevice> {
- typedef float4 type;
- static const int size = 4;
-};
-template <>
-struct PacketType<double, GpuDevice> {
- typedef double2 type;
- static const int size = 2;
+ enum {
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasNegate = 1,
+ HasAbs = 1,
+ HasArg = 0,
+ HasAbs2 = 0,
+ HasMin = 1,
+ HasMax = 1,
+ HasConj = 0,
+ HasSetLinear = 0,
+ HasBlend = 0,
+
+ HasDiv = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasExp = 1,
+ HasLog = 1,
+ HasLog1p = 0,
+ HasLog10 = 0,
+ HasPow = 1,
+ };
};
#endif
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
index 1b4fdd03f..d9bbcd858 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@@ -328,7 +328,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
// Unfortunately nvidia doesn't support well exotic types such as complex,
// so reduce the scope of the optimized version of the code to the simple case
// of floats and half floats.
- #ifdef EIGEN_HAS_CUDA_FP16
+#ifdef EIGEN_HAS_CUDA_FP16
static const bool HasOptimizedImplementation = !Op::IsStateful &&
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));