diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2015-07-15 12:38:34 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2015-07-15 12:38:34 -0700 |
commit | e892524efe7e8adbd43bf4c1c150f4f4ebf27d1d (patch) | |
tree | 4bc0c43b27ec1ee6a595baf0e56904da5b309855 /unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | |
parent | f5aa64086228ca9ccfa27e6086667fd0bdbad22c (diff) |
Added support for multi gpu configuration to the GpuDevice class
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 24606b0c8..a795f8eaa 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -205,8 +205,8 @@ class TensorExecutor<Expression, GpuDevice, false> const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); if (needs_assign) { - const int num_blocks = getNumCudaMultiProcessors() * maxCudaThreadsPerMultiProcessor() / maxCudaThreadsPerBlock(); - const int block_size = maxCudaThreadsPerBlock(); + const int num_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / device.maxCudaThreadsPerBlock(); + const int block_size = device.maxCudaThreadsPerBlock(); const Index size = array_prod(evaluator.dimensions()); LAUNCH_CUDA_KERNEL((EigenMetaKernel_NonVectorizable<TensorEvaluator<Expression, GpuDevice>, Index>), num_blocks, block_size, 0, device, evaluator, size); } @@ -225,8 +225,8 @@ class TensorExecutor<Expression, GpuDevice, true> const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); if (needs_assign) { - const int num_blocks = getNumCudaMultiProcessors() * maxCudaThreadsPerMultiProcessor() / maxCudaThreadsPerBlock(); - const int block_size = maxCudaThreadsPerBlock(); + const int num_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / device.maxCudaThreadsPerBlock(); + const int block_size = device.maxCudaThreadsPerBlock(); const Index size = array_prod(evaluator.dimensions()); LAUNCH_CUDA_KERNEL((EigenMetaKernel_Vectorizable<TensorEvaluator<Expression, GpuDevice>, Index>), num_blocks, block_size, 0, device, evaluator, size); } |