aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-02-01 12:46:32 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-02-01 12:46:32 -0800
commit6b5dff875e4ba2235f255b7cf0a86b7abed21df0 (patch)
tree9dbf00da5c4e1b0a9689422b8faf5448af750416 /unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
parent264f8141f86e84312f0eea9e741d2260ed839890 (diff)
Made it possible to limit the number of blocks that will be used to evaluate a tensor expression on a CUDA device. This makesit possible to set aside streaming multiprocessors for other computations.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h12
1 files changed, 9 insertions, 3 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
index 5abdc489b..e684ab8f7 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
@@ -109,10 +109,12 @@ class CudaStreamDevice : public StreamInterface {
struct GpuDevice {
// The StreamInterface is not owned: the caller is
// responsible for its initialization and eventual destruction.
- explicit GpuDevice(const StreamInterface* stream) : stream_(stream) {
+ explicit GpuDevice(const StreamInterface* stream) : stream_(stream), max_blocks_(INT_MAX) {
+ eigen_assert(stream);
+ }
+ explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) {
eigen_assert(stream);
}
-
// TODO(bsteiner): This is an internal API, we should not expose it.
EIGEN_STRONG_INLINE const cudaStream_t& stream() const {
return stream_->stream();
@@ -246,6 +248,10 @@ struct GpuDevice {
#endif
}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const {
+ return max_blocks_;
+ }
+
// This function checks if the CUDA runtime recorded an error for the
// underlying stream device.
inline bool ok() const {
@@ -259,7 +265,7 @@ struct GpuDevice {
private:
const StreamInterface* stream_;
-
+ int max_blocks_;
};
#ifndef __CUDA_ARCH__