diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-05-10 17:04:01 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-05-10 17:04:01 -0700 |
commit | 4ede059de17f54b5c29b27e86486016caef0b795 (patch) | |
tree | 752f18086942c5684ababfee593b2330c8649d5c | |
parent | bf185c3c28073c5fd2c2e88cfcd2438bf1074677 (diff) |
Properly gate the use of half2.
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | 4 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h | 2 |
2 files changed, 6 insertions, 0 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 97f4b34b3..71061293b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -323,10 +323,12 @@ template <int B, int N, typename S, typename R, typename I> __global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*); +#ifdef EIGEN_HAS_CUDA_FP16 template <typename S, typename R, typename I> __global__ void ReductionInitKernelHalfFloat(R, const S, I, half2*); template <int B, int N, typename S, typename R, typename I> __global__ void FullReductionKernelHalfFloat(R, const S, I, half*, half2*); +#endif template <int NPT, typename S, typename R, typename I> __global__ void InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); @@ -624,8 +626,10 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> #endif #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) template <int B, int N, typename S, typename R, typename I> friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*); +#ifdef EIGEN_HAS_CUDA_FP16 template <typename S, typename R, typename I> friend void internal::ReductionInitKernelHalfFloat(R, const S, I, half2*); template <int B, int N, typename S, typename R, typename I> friend void internal::FullReductionKernelHalfFloat(R, const S, I, half*, half2*); +#endif template <int NPT, typename S, typename R, typename I> friend void internal::InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); template <int NPT, typename S, typename R, typename I> friend void internal::OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); #endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index afa1a2697..6db9e63c8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -68,6 +68,7 @@ __device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) } +#ifdef EIGEN_HAS_CUDA_FP16 template <template <typename T> class R> __device__ inline void atomicReduce(half2* output, half2 accum, R<half>& reducer) { #if __CUDA_ARCH__ >= 300 @@ -90,6 +91,7 @@ __device__ inline void atomicReduce(half2* output, half2 accum, R<half>& reducer assert(0 && "Shouldn't be called on unsupported device"); #endif } +#endif template <> __device__ inline void atomicReduce(float* output, float accum, SumReducer<float>&) { |