Made it possible to limit the number of blocks that will be used to evaluate a tensor expression on a CUDA device. This makesit possible to set aside streaming multiprocessors for other computations.

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-02-01 12:46:32 -0800
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-02-01 12:46:32 -0800
commit: 6b5dff875e4ba2235f255b7cf0a86b7abed21df0 (patch)
tree: 9dbf00da5c4e1b0a9689422b8faf5448af750416 /unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
parent: 264f8141f86e84312f0eea9e741d2260ed839890 (diff)
1 files changed, 9 insertions, 3 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
index 5abdc489b..e684ab8f7 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
@@ -109,10 +109,12 @@ class CudaStreamDevice : public StreamInterface {
 struct GpuDevice {
   // The StreamInterface is not owned: the caller is
   // responsible for its initialization and eventual destruction.
-  explicit GpuDevice(const StreamInterface* stream) : stream_(stream) {
+  explicit GpuDevice(const StreamInterface* stream) : stream_(stream), max_blocks_(INT_MAX) {
+    eigen_assert(stream);
+  }
+  explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) {
     eigen_assert(stream);
   }
-
   // TODO(bsteiner): This is an internal API, we should not expose it.
   EIGEN_STRONG_INLINE const cudaStream_t& stream() const {
     return stream_->stream();
@@ -246,6 +248,10 @@ struct GpuDevice {
 #endif
   }
 
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const {
+    return max_blocks_;
+  }
+
   // This function checks if the CUDA runtime recorded an error for the
   // underlying stream device.
   inline bool ok() const {
@@ -259,7 +265,7 @@ struct GpuDevice {
 
  private:
   const StreamInterface* stream_;
-
+  int max_blocks_;
 };
 
 #ifndef __CUDA_ARCH__
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-02-01 12:46:32 -0800
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-02-01 12:46:32 -0800
commit	6b5dff875e4ba2235f255b7cf0a86b7abed21df0 (patch)
tree	9dbf00da5c4e1b0a9689422b8faf5448af750416 /unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
parent	264f8141f86e84312f0eea9e741d2260ed839890 (diff)