Re-enabled the optimized reduction CUDA code.

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-01-11 09:07:14 -0800
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-01-11 09:07:14 -0800
commit: 780623261eedd996404795dfb7928e680408adb5 (patch)
tree: 14f995a2491d2dead393ad75914310bb0ce5f8a1 /unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
parent: b557662e589a76265f73b99d7ca54a988d7bdb59 (diff)
1 files changed, 3 insertions, 3 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
index 558d0c83d..198b3604c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@@ -126,7 +126,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
     const int block_size = 256;
     const int num_per_thread = 128;
     const int num_blocks = std::ceil(static_cast<float>(num_coeffs) / (block_size * num_per_thread));
-    LAUNCH_CUDA_KERNEL((FullReductionKernel<block_size, num_per_thread>),
+    LAUNCH_CUDA_KERNEL((FullReductionKernel<block_size, num_per_thread, Self, Op, Index>),
                        num_blocks, block_size, 0, device, reducer, self, num_coeffs, output);
   }
 };
@@ -222,7 +222,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
     const int num_per_thread = 128;
     const int num_blocks = 32;
 
-    LAUNCH_CUDA_KERNEL((InnerReductionKernel<num_per_thread>),
+    LAUNCH_CUDA_KERNEL((InnerReductionKernel<num_per_thread, Self, Op, Index>),
                        num_blocks, block_size, block_size*sizeof(float), device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
   }
 };
@@ -279,7 +279,7 @@ struct OuterReducer<Self, Op, GpuDevice> {
                            device.maxCudaThreadsPerMultiProcessor() / block_size;
     const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
 
-    LAUNCH_CUDA_KERNEL((OuterReductionKernel<num_per_thread>),
+    LAUNCH_CUDA_KERNEL((OuterReductionKernel<num_per_thread, Self, Op, Index>),
                        num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
   }
 };
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-01-11 09:07:14 -0800
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-01-11 09:07:14 -0800
commit	780623261eedd996404795dfb7928e680408adb5 (patch)
tree	14f995a2491d2dead393ad75914310bb0ce5f8a1 /unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
parent	b557662e589a76265f73b99d7ca54a988d7bdb59 (diff)