Properly gate the use of half2.

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-05-10 17:04:01 -0700
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-05-10 17:04:01 -0700
commit: 4ede059de17f54b5c29b27e86486016caef0b795 (patch)
tree: 752f18086942c5684ababfee593b2330c8649d5c
parent: bf185c3c28073c5fd2c2e88cfcd2438bf1074677 (diff)
2 files changed, 6 insertions, 0 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index 97f4b34b3..71061293b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -323,10 +323,12 @@ template <int B, int N, typename S, typename R, typename I>
 __global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*);
 
 
+#ifdef EIGEN_HAS_CUDA_FP16
 template <typename S, typename R, typename I>
 __global__ void ReductionInitKernelHalfFloat(R, const S, I, half2*);
 template <int B, int N, typename S, typename R, typename I>
 __global__ void FullReductionKernelHalfFloat(R, const S, I, half*, half2*);
+#endif
 
 template <int NPT, typename S, typename R, typename I>
 __global__ void InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*);
@@ -624,8 +626,10 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
 #endif
 #if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
   template <int B, int N, typename S, typename R, typename I> friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*);
+#ifdef EIGEN_HAS_CUDA_FP16
   template <typename S, typename R, typename I> friend void internal::ReductionInitKernelHalfFloat(R, const S, I, half2*);
   template <int B, int N, typename S, typename R, typename I> friend void internal::FullReductionKernelHalfFloat(R, const S, I, half*, half2*);
+#endif
   template <int NPT, typename S, typename R, typename I> friend void internal::InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*);
   template <int NPT, typename S, typename R, typename I> friend void internal::OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnType*);
 #endif
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
index afa1a2697..6db9e63c8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@@ -68,6 +68,7 @@ __device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer)
 }
 
 
+#ifdef EIGEN_HAS_CUDA_FP16
 template <template <typename T> class R>
 __device__ inline void atomicReduce(half2* output, half2 accum, R<half>& reducer) {
 #if __CUDA_ARCH__ >= 300
@@ -90,6 +91,7 @@ __device__ inline void atomicReduce(half2* output, half2 accum, R<half>& reducer
   assert(0 && "Shouldn't be called on unsupported device");
 #endif
 }
+#endif
 
 template <>
 __device__ inline void atomicReduce(float* output, float accum, SumReducer<float>&) {
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-05-10 17:04:01 -0700
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-05-10 17:04:01 -0700
commit	4ede059de17f54b5c29b27e86486016caef0b795 (patch)
tree	752f18086942c5684ababfee593b2330c8649d5c
parent	bf185c3c28073c5fd2c2e88cfcd2438bf1074677 (diff)