aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-07-15 12:38:34 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-07-15 12:38:34 -0700
commite892524efe7e8adbd43bf4c1c150f4f4ebf27d1d (patch)
tree4bc0c43b27ec1ee6a595baf0e56904da5b309855 /unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
parentf5aa64086228ca9ccfa27e6086667fd0bdbad22c (diff)
Added support for multi gpu configuration to the GpuDevice class
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h8
1 files changed, 4 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 24606b0c8..a795f8eaa 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -205,8 +205,8 @@ class TensorExecutor<Expression, GpuDevice, false>
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
if (needs_assign)
{
- const int num_blocks = getNumCudaMultiProcessors() * maxCudaThreadsPerMultiProcessor() / maxCudaThreadsPerBlock();
- const int block_size = maxCudaThreadsPerBlock();
+ const int num_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / device.maxCudaThreadsPerBlock();
+ const int block_size = device.maxCudaThreadsPerBlock();
const Index size = array_prod(evaluator.dimensions());
LAUNCH_CUDA_KERNEL((EigenMetaKernel_NonVectorizable<TensorEvaluator<Expression, GpuDevice>, Index>), num_blocks, block_size, 0, device, evaluator, size);
}
@@ -225,8 +225,8 @@ class TensorExecutor<Expression, GpuDevice, true>
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
if (needs_assign)
{
- const int num_blocks = getNumCudaMultiProcessors() * maxCudaThreadsPerMultiProcessor() / maxCudaThreadsPerBlock();
- const int block_size = maxCudaThreadsPerBlock();
+ const int num_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / device.maxCudaThreadsPerBlock();
+ const int block_size = device.maxCudaThreadsPerBlock();
const Index size = array_prod(evaluator.dimensions());
LAUNCH_CUDA_KERNEL((EigenMetaKernel_Vectorizable<TensorEvaluator<Expression, GpuDevice>, Index>), num_blocks, block_size, 0, device, evaluator, size);
}