Made it possible to compile reductions for an old cuda architecture and run them on a recent gpu.

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-06-29 15:42:01 -0700
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-06-29 15:42:01 -0700
commit: cb2d8b8fa6e37b9023919ee3d2ef759fcf400e0f (patch)
tree: d7b6dce0dea9d5b61bd420d5aa8c43596f419324 /unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
parent: b2a47641ce0ad0642d93db0030cbf8cd0bb7f2c0 (diff)
1 files changed, 10 insertions, 2 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
index 5e512490c..d3894e625 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@@ -336,9 +336,11 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
   static const bool HasOptimizedImplementation = !Op::IsStateful &&
       (internal::is_same<typename Self::CoeffReturnType, float>::value ||
        (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
-#else
+#elif __CUDA_ARCH__ >= 300
   static const bool HasOptimizedImplementation = !Op::IsStateful &&
                                                  internal::is_same<typename Self::CoeffReturnType, float>::value;
+#else
+  static const bool HasOptimizedImplementation = false;
 #endif
 
   template <typename OutputType>
@@ -617,9 +619,11 @@ struct InnerReducer<Self, Op, GpuDevice> {
   static const bool HasOptimizedImplementation = !Op::IsStateful &&
       (internal::is_same<typename Self::CoeffReturnType, float>::value ||
        (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
-#else
+#elif __CUDA_ARCH__ >= 300
   static const bool HasOptimizedImplementation = !Op::IsStateful &&
                                                  internal::is_same<typename Self::CoeffReturnType, float>::value;
+#else
+  static const bool HasOptimizedImplementation = false;
 #endif
 
   template <typename OutputType>
@@ -674,8 +678,12 @@ struct OuterReducer<Self, Op, GpuDevice> {
   // Unfortunately nvidia doesn't support well exotic types such as complex,
   // so reduce the scope of the optimized version of the code to the simple case
   // of floats.
+#if __CUDA_ARCH__ >= 300
   static const bool HasOptimizedImplementation = !Op::IsStateful &&
                                                  internal::is_same<typename Self::CoeffReturnType, float>::value;
+#else
+  static const bool HasOptimizedImplementation = false;
+#endif
 
   template <typename Device, typename OutputType>
   static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-06-29 15:42:01 -0700
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-06-29 15:42:01 -0700
commit	cb2d8b8fa6e37b9023919ee3d2ef759fcf400e0f (patch)
tree	d7b6dce0dea9d5b61bd420d5aa8c43596f419324 /unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
parent	b2a47641ce0ad0642d93db0030cbf8cd0bb7f2c0 (diff)