diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-06-09 08:22:27 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-06-09 08:22:27 -0700 |
commit | aa33446dace833fbf06632e586c80119b3d8ac11 (patch) | |
tree | d7bc59330e69e19de37da9fffc64016ee0febec0 /unsupported/Eigen/CXX11/src/Tensor | |
parent | 15890c304edbccedc8a989468ed3fc475f428059 (diff) |
Improved support for vectorization of 16-bit floats
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h | 86 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h | 5 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h | 8 |
3 files changed, 95 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 3dd32e9d1..bf52e490f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -84,6 +84,14 @@ struct functor_traits<scalar_sigmoid_op<T> > { }; +template<typename Reducer, typename Device> +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + // Standard reduction functors template <typename T> struct SumReducer { @@ -119,6 +127,15 @@ template <typename T> struct SumReducer } }; +template <typename T, typename Device> +struct reducer_traits<SumReducer<T>, Device> { + enum { + Cost = NumTraits<T>::AddCost, + PacketAccess = PacketType<T, Device>::type::HasAdd + }; +}; + + template <typename T> struct MeanReducer { static const bool PacketAccess = packet_traits<T>::HasAdd && !NumTraits<T>::IsInteger; @@ -162,6 +179,15 @@ template <typename T> struct MeanReducer DenseIndex packetCount_; }; +template <typename T, typename Device> +struct reducer_traits<MeanReducer<T>, Device> { + enum { + Cost = NumTraits<T>::AddCost, + PacketAccess = PacketType<T, Device>::type::HasAdd + }; +}; + + template <typename T> struct MaxReducer { static const bool PacketAccess = packet_traits<T>::HasMax; @@ -195,6 +221,15 @@ template <typename T> struct MaxReducer } }; +template <typename T, typename Device> +struct reducer_traits<MaxReducer<T>, Device> { + enum { + Cost = NumTraits<T>::AddCost, + PacketAccess = PacketType<T, Device>::type::HasMax + }; +}; + + template <typename T> struct MinReducer { static const bool PacketAccess = packet_traits<T>::HasMin; @@ -228,6 +263,14 @@ template <typename T> struct MinReducer } }; +template <typename T, typename Device> +struct reducer_traits<MinReducer<T>, Device> { + enum { + Cost = NumTraits<T>::AddCost, + PacketAccess = PacketType<T, Device>::type::HasMin + }; +}; + template <typename T> struct ProdReducer { @@ -263,6 +306,14 @@ template <typename T> struct ProdReducer } }; +template <typename T, typename Device> +struct reducer_traits<ProdReducer<T>, Device> { + enum { + Cost = NumTraits<T>::MulCost, + PacketAccess = PacketType<T, Device>::type::HasMul + }; +}; + struct AndReducer { @@ -280,6 +331,15 @@ struct AndReducer } }; +template <typename Device> +struct reducer_traits<AndReducer, Device> { + enum { + Cost = 1, + PacketAccess = false + }; +}; + + struct OrReducer { static const bool PacketAccess = false; static const bool IsStateful = false; @@ -295,6 +355,15 @@ struct OrReducer { } }; +template <typename Device> +struct reducer_traits<OrReducer, Device> { + enum { + Cost = 1, + PacketAccess = false + }; +}; + + // Argmin/Argmax reducers template <typename T> struct ArgMaxTupleReducer { @@ -312,6 +381,15 @@ template <typename T> struct ArgMaxTupleReducer } }; +template <typename T, typename Device> +struct reducer_traits<ArgMaxTupleReducer<T>, Device> { + enum { + Cost = NumTraits<T>::AddCost, + PacketAccess = false + }; +}; + + template <typename T> struct ArgMinTupleReducer { static const bool PacketAccess = false; @@ -328,6 +406,14 @@ template <typename T> struct ArgMinTupleReducer } }; +template <typename T, typename Device> +struct reducer_traits<ArgMinTupleReducer<T>, Device> { + enum { + Cost = NumTraits<T>::AddCost, + PacketAccess = false + }; +}; + // Random number generation namespace { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index b1645d56f..82a905a65 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -55,6 +55,11 @@ struct PacketType { // For CUDA packet types when using a GpuDevice #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) template <> + struct PacketType<half, GpuDevice> { + typedef half2 type; + static const int size = 2; + }; +template <> struct PacketType<float, GpuDevice> { typedef float4 type; static const int size = 4; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index e82530955..1b4fdd03f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -331,7 +331,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> { #ifdef EIGEN_HAS_CUDA_FP16 static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same<typename Self::CoeffReturnType, float>::value || - (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess)); + (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess)); #else static const bool HasOptimizedImplementation = !Op::IsStateful && internal::is_same<typename Self::CoeffReturnType, float>::value; @@ -346,7 +346,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> { return; } - FullReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs); + FullReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs); } }; @@ -608,7 +608,7 @@ struct InnerReducer<Self, Op, GpuDevice> { #ifdef EIGEN_HAS_CUDA_FP16 static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same<typename Self::CoeffReturnType, float>::value || - (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess)); + (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess)); #else static const bool HasOptimizedImplementation = !Op::IsStateful && internal::is_same<typename Self::CoeffReturnType, float>::value; @@ -627,7 +627,7 @@ struct InnerReducer<Self, Op, GpuDevice> { return true; } - return InnerReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals); + return InnerReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals); } }; |