CUDA_ARCH isn't always defined, so avoid relying on it too much when figuring out which implementation to use for reductions. Instead rely on the device to tell us on which hardware version we're running.

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-08-03 10:00:43 -0700
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-08-03 10:00:43 -0700
commit: a20b58845f5f457375a91ec7e8acdeee2f920d33 (patch)
tree: cd762252deb305386b4690d11bb81ef5005b343b /unsupported
parent: 819d0cea1b8cc27c55c94886c85a0a3efc155a50 (diff)
1 files changed, 2 insertions, 10 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
index d3894e625..5e512490c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@@ -336,11 +336,9 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
   static const bool HasOptimizedImplementation = !Op::IsStateful &&
       (internal::is_same<typename Self::CoeffReturnType, float>::value ||
        (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
-#elif __CUDA_ARCH__ >= 300
+#else
   static const bool HasOptimizedImplementation = !Op::IsStateful &&
                                                  internal::is_same<typename Self::CoeffReturnType, float>::value;
-#else
-  static const bool HasOptimizedImplementation = false;
 #endif
 
   template <typename OutputType>
@@ -619,11 +617,9 @@ struct InnerReducer<Self, Op, GpuDevice> {
   static const bool HasOptimizedImplementation = !Op::IsStateful &&
       (internal::is_same<typename Self::CoeffReturnType, float>::value ||
        (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
-#elif __CUDA_ARCH__ >= 300
+#else
   static const bool HasOptimizedImplementation = !Op::IsStateful &&
                                                  internal::is_same<typename Self::CoeffReturnType, float>::value;
-#else
-  static const bool HasOptimizedImplementation = false;
 #endif
 
   template <typename OutputType>
@@ -678,12 +674,8 @@ struct OuterReducer<Self, Op, GpuDevice> {
   // Unfortunately nvidia doesn't support well exotic types such as complex,
   // so reduce the scope of the optimized version of the code to the simple case
   // of floats.
-#if __CUDA_ARCH__ >= 300
   static const bool HasOptimizedImplementation = !Op::IsStateful &&
                                                  internal::is_same<typename Self::CoeffReturnType, float>::value;
-#else
-  static const bool HasOptimizedImplementation = false;
-#endif
 
   template <typename Device, typename OutputType>
   static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-08-03 10:00:43 -0700
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-08-03 10:00:43 -0700
commit	a20b58845f5f457375a91ec7e8acdeee2f920d33 (patch)
tree	cd762252deb305386b4690d11bb81ef5005b343b /unsupported
parent	819d0cea1b8cc27c55c94886c85a0a3efc155a50 (diff)