remove duplicate pset1 for half and add some comments about why we need expose pmul/add/div/min/max on host

author: Sami Kama <sami.kama.git@gmail.com> 2020-03-10 20:28:43 +0000
committer: Rasmus Munk Larsen <rmlarsen@google.com> 2020-03-10 20:28:43 +0000
commit: b733b8b680885c0fcdfddea5423171468609b5a6 (patch)
tree: 1174a4651bbdbe979a8bd33e97edf4011c8cc7e4 /unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
parent: a45d28256d020a4e871267c9bf00206fe9d2265e (diff)
1 files changed, 4 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index 5ca694062..8332a9ae0 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -420,9 +420,9 @@ __global__ void FullReductionKernel(R, const S, I_, typename S::CoeffReturnType*
 
 #if defined(EIGEN_HAS_GPU_FP16)
 template <typename S, typename R, typename I_>
-__global__ void ReductionInitFullReduxKernelHalfFloat(R, const S, I_, half2*);
+__global__ void ReductionInitFullReduxKernelHalfFloat(R, const S, I_, internal::packet_traits<half>::type*);
 template <int B, int N, typename S, typename R, typename I_>
-__global__ void FullReductionKernelHalfFloat(R, const S, I_, half*, half2*);
+__global__ void FullReductionKernelHalfFloat(R, const S, I_, half*, internal::packet_traits<half>::type*);
 template <int NPT, typename S, typename R, typename I_>
 __global__ void InnerReductionKernelHalfFloat(R, const S, I_, I_, half*);
 
@@ -863,8 +863,8 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
 #if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC))
   template <int B, int N, typename S, typename R, typename I_> KERNEL_FRIEND void internal::FullReductionKernel(R, const S, I_, typename S::CoeffReturnType*, unsigned int*);
 #if defined(EIGEN_HAS_GPU_FP16)
-  template <typename S, typename R, typename I_> KERNEL_FRIEND void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I_, half2*);
-  template <int B, int N, typename S, typename R, typename I_> KERNEL_FRIEND void internal::FullReductionKernelHalfFloat(R, const S, I_, half*, half2*);
+  template <typename S, typename R, typename I_> KERNEL_FRIEND void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I_, internal::packet_traits<Eigen::half>::type*);
+  template <int B, int N, typename S, typename R, typename I_> KERNEL_FRIEND void internal::FullReductionKernelHalfFloat(R, const S, I_, half*, internal::packet_traits<Eigen::half>::type*);
   template <int NPT, typename S, typename R, typename I_> KERNEL_FRIEND void internal::InnerReductionKernelHalfFloat(R, const S, I_, I_, half*);
 #endif
   template <int NPT, typename S, typename R, typename I_> KERNEL_FRIEND void internal::InnerReductionKernel(R, const S, I_, I_, typename S::CoeffReturnType*);
author	Sami Kama <sami.kama.git@gmail.com>	2020-03-10 20:28:43 +0000
committer	Rasmus Munk Larsen <rmlarsen@google.com>	2020-03-10 20:28:43 +0000
commit	b733b8b680885c0fcdfddea5423171468609b5a6 (patch)
tree	1174a4651bbdbe979a8bd33e97edf4011c8cc7e4 /unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
parent	a45d28256d020a4e871267c9bf00206fe9d2265e (diff)