diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-02-01 12:46:32 -0800 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-02-01 12:46:32 -0800 |
commit | 6b5dff875e4ba2235f255b7cf0a86b7abed21df0 (patch) | |
tree | 9dbf00da5c4e1b0a9689422b8faf5448af750416 /unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h | |
parent | 264f8141f86e84312f0eea9e741d2260ed839890 (diff) |
Made it possible to limit the number of blocks that will be used to evaluate a tensor expression on a CUDA device. This makesit possible to set aside streaming multiprocessors for other computations.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h index 5abdc489b..e684ab8f7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -109,10 +109,12 @@ class CudaStreamDevice : public StreamInterface { struct GpuDevice { // The StreamInterface is not owned: the caller is // responsible for its initialization and eventual destruction. - explicit GpuDevice(const StreamInterface* stream) : stream_(stream) { + explicit GpuDevice(const StreamInterface* stream) : stream_(stream), max_blocks_(INT_MAX) { + eigen_assert(stream); + } + explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) { eigen_assert(stream); } - // TODO(bsteiner): This is an internal API, we should not expose it. EIGEN_STRONG_INLINE const cudaStream_t& stream() const { return stream_->stream(); @@ -246,6 +248,10 @@ struct GpuDevice { #endif } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const { + return max_blocks_; + } + // This function checks if the CUDA runtime recorded an error for the // underlying stream device. inline bool ok() const { @@ -259,7 +265,7 @@ struct GpuDevice { private: const StreamInterface* stream_; - + int max_blocks_; }; #ifndef __CUDA_ARCH__ |