Added support for multi gpu configuration to the GpuDevice class

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2015-07-15 12:38:34 -0700
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2015-07-15 12:38:34 -0700
commit: e892524efe7e8adbd43bf4c1c150f4f4ebf27d1d (patch)
tree: 4bc0c43b27ec1ee6a595baf0e56904da5b309855 /unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
parent: f5aa64086228ca9ccfa27e6086667fd0bdbad22c (diff)
1 files changed, 4 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 24606b0c8..a795f8eaa 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -205,8 +205,8 @@ class TensorExecutor<Expression, GpuDevice, false>
     const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
     if (needs_assign)
     {
-      const int num_blocks = getNumCudaMultiProcessors() * maxCudaThreadsPerMultiProcessor() / maxCudaThreadsPerBlock();
-      const int block_size = maxCudaThreadsPerBlock();
+      const int num_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / device.maxCudaThreadsPerBlock();
+      const int block_size = device.maxCudaThreadsPerBlock();
       const Index size = array_prod(evaluator.dimensions());
       LAUNCH_CUDA_KERNEL((EigenMetaKernel_NonVectorizable<TensorEvaluator<Expression, GpuDevice>, Index>), num_blocks, block_size, 0, device, evaluator, size);
     }
@@ -225,8 +225,8 @@ class TensorExecutor<Expression, GpuDevice, true>
     const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
     if (needs_assign)
     {
-      const int num_blocks = getNumCudaMultiProcessors() * maxCudaThreadsPerMultiProcessor() / maxCudaThreadsPerBlock();
-      const int block_size = maxCudaThreadsPerBlock();
+      const int num_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / device.maxCudaThreadsPerBlock();
+      const int block_size = device.maxCudaThreadsPerBlock();
       const Index size = array_prod(evaluator.dimensions());
       LAUNCH_CUDA_KERNEL((EigenMetaKernel_Vectorizable<TensorEvaluator<Expression, GpuDevice>, Index>), num_blocks, block_size, 0, device, evaluator, size);
     }
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2015-07-15 12:38:34 -0700
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2015-07-15 12:38:34 -0700
commit	e892524efe7e8adbd43bf4c1c150f4f4ebf27d1d (patch)
tree	4bc0c43b27ec1ee6a595baf0e56904da5b309855 /unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
parent	f5aa64086228ca9ccfa27e6086667fd0bdbad22c (diff)