diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-10-30 17:49:39 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-10-30 17:49:39 -0700 |
commit | 5e62427e22002019d1a3ef05daeb75c6db7c6405 (patch) | |
tree | 3215f032495732604331c5818f70bd9e3c8c2219 /unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | |
parent | debc97821c775518afd54e05e19dec9eb0c3bde1 (diff) |
Use the proper index type
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 20 |
1 files changed, 10 insertions, 10 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 01fa04c64..4fa8e83ef 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -149,26 +149,26 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> // GPU: the evaluation of the expression is offloaded to a GPU. #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) -template <typename Evaluator> +template <typename Evaluator, typename Index> __global__ void __launch_bounds__(1024) -EigenMetaKernel(Evaluator eval, unsigned int size) { + EigenMetaKernel(Evaluator eval, Index size) { - const int first_index = blockIdx.x * blockDim.x + threadIdx.x; - const int step_size = blockDim.x * gridDim.x; + const Index first_index = blockIdx.x * blockDim.x + threadIdx.x; + const Index step_size = blockDim.x * gridDim.x; if (!Evaluator::PacketAccess || !Evaluator::IsAligned) { // Use the scalar path - for (int i = first_index; i < size; i += step_size) { + for (Index i = first_index; i < size; i += step_size) { eval.evalScalar(i); } } else { // Use the vector path - const int PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size; - const int vectorized_step_size = step_size * PacketSize; - const int vectorized_size = (size / PacketSize) * PacketSize; - int i = first_index * PacketSize; + const Index PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size; + const Index vectorized_step_size = step_size * PacketSize; + const Index vectorized_size = (size / PacketSize) * PacketSize; + Index i = first_index * PacketSize; for ( ; i < vectorized_size; i += vectorized_step_size) { eval.evalPacket(i); } @@ -193,7 +193,7 @@ class TensorExecutor<Expression, GpuDevice, Vectorizable> const int block_size = maxCudaThreadsPerBlock(); const Index size = array_prod(evaluator.dimensions()); - EigenMetaKernel<TensorEvaluator<Expression, GpuDevice> > <<<num_blocks, block_size, 0, device.stream()>>>(evaluator, size); + EigenMetaKernel<TensorEvaluator<Expression, GpuDevice>, Index><<<num_blocks, block_size, 0, device.stream()>>>(evaluator, size); assert(cudaGetLastError() == cudaSuccess); } evaluator.cleanup(); |