diff options
author | Sami Kama <sami.kama.git@gmail.com> | 2020-03-10 20:28:43 +0000 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2020-03-10 20:28:43 +0000 |
commit | b733b8b680885c0fcdfddea5423171468609b5a6 (patch) | |
tree | 1174a4651bbdbe979a8bd33e97edf4011c8cc7e4 /unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | |
parent | a45d28256d020a4e871267c9bf00206fe9d2265e (diff) |
remove duplicate pset1 for half and add some comments about why we need expose pmul/add/div/min/max on host
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 5ca694062..8332a9ae0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -420,9 +420,9 @@ __global__ void FullReductionKernel(R, const S, I_, typename S::CoeffReturnType* #if defined(EIGEN_HAS_GPU_FP16) template <typename S, typename R, typename I_> -__global__ void ReductionInitFullReduxKernelHalfFloat(R, const S, I_, half2*); +__global__ void ReductionInitFullReduxKernelHalfFloat(R, const S, I_, internal::packet_traits<half>::type*); template <int B, int N, typename S, typename R, typename I_> -__global__ void FullReductionKernelHalfFloat(R, const S, I_, half*, half2*); +__global__ void FullReductionKernelHalfFloat(R, const S, I_, half*, internal::packet_traits<half>::type*); template <int NPT, typename S, typename R, typename I_> __global__ void InnerReductionKernelHalfFloat(R, const S, I_, I_, half*); @@ -863,8 +863,8 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M #if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC)) template <int B, int N, typename S, typename R, typename I_> KERNEL_FRIEND void internal::FullReductionKernel(R, const S, I_, typename S::CoeffReturnType*, unsigned int*); #if defined(EIGEN_HAS_GPU_FP16) - template <typename S, typename R, typename I_> KERNEL_FRIEND void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I_, half2*); - template <int B, int N, typename S, typename R, typename I_> KERNEL_FRIEND void internal::FullReductionKernelHalfFloat(R, const S, I_, half*, half2*); + template <typename S, typename R, typename I_> KERNEL_FRIEND void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I_, internal::packet_traits<Eigen::half>::type*); + template <int B, int N, typename S, typename R, typename I_> KERNEL_FRIEND void internal::FullReductionKernelHalfFloat(R, const S, I_, half*, internal::packet_traits<Eigen::half>::type*); template <int NPT, typename S, typename R, typename I_> KERNEL_FRIEND void internal::InnerReductionKernelHalfFloat(R, const S, I_, I_, half*); #endif template <int NPT, typename S, typename R, typename I_> KERNEL_FRIEND void internal::InnerReductionKernel(R, const S, I_, I_, typename S::CoeffReturnType*); |