From 6b5dff875e4ba2235f255b7cf0a86b7abed21df0 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 1 Feb 2016 12:46:32 -0800 Subject: Made it possible to limit the number of blocks that will be used to evaluate a tensor expression on a CUDA device. This makesit possible to set aside streaming multiprocessors for other computations. --- unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h index 5abdc489b..e684ab8f7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -109,10 +109,12 @@ class CudaStreamDevice : public StreamInterface { struct GpuDevice { // The StreamInterface is not owned: the caller is // responsible for its initialization and eventual destruction. - explicit GpuDevice(const StreamInterface* stream) : stream_(stream) { + explicit GpuDevice(const StreamInterface* stream) : stream_(stream), max_blocks_(INT_MAX) { + eigen_assert(stream); + } + explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) { eigen_assert(stream); } - // TODO(bsteiner): This is an internal API, we should not expose it. EIGEN_STRONG_INLINE const cudaStream_t& stream() const { return stream_->stream(); @@ -246,6 +248,10 @@ struct GpuDevice { #endif } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const { + return max_blocks_; + } + // This function checks if the CUDA runtime recorded an error for the // underlying stream device. inline bool ok() const { @@ -259,7 +265,7 @@ struct GpuDevice { private: const StreamInterface* stream_; - + int max_blocks_; }; #ifndef __CUDA_ARCH__ -- cgit v1.2.3