diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index 558d0c83d..198b3604c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -126,7 +126,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> { const int block_size = 256; const int num_per_thread = 128; const int num_blocks = std::ceil(static_cast<float>(num_coeffs) / (block_size * num_per_thread)); - LAUNCH_CUDA_KERNEL((FullReductionKernel<block_size, num_per_thread>), + LAUNCH_CUDA_KERNEL((FullReductionKernel<block_size, num_per_thread, Self, Op, Index>), num_blocks, block_size, 0, device, reducer, self, num_coeffs, output); } }; @@ -222,7 +222,7 @@ struct InnerReducer<Self, Op, GpuDevice> { const int num_per_thread = 128; const int num_blocks = 32; - LAUNCH_CUDA_KERNEL((InnerReductionKernel<num_per_thread>), + LAUNCH_CUDA_KERNEL((InnerReductionKernel<num_per_thread, Self, Op, Index>), num_blocks, block_size, block_size*sizeof(float), device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output); } }; @@ -279,7 +279,7 @@ struct OuterReducer<Self, Op, GpuDevice> { device.maxCudaThreadsPerMultiProcessor() / block_size; const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks); - LAUNCH_CUDA_KERNEL((OuterReductionKernel<num_per_thread>), + LAUNCH_CUDA_KERNEL((OuterReductionKernel<num_per_thread, Self, Op, Index>), num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output); } }; |