From 8fe2532e70a8e0261717003d96d4df41ab978756 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 14 Jan 2016 09:29:48 -0800 Subject: Fixed a boundary condition bug in the outer reduction kernel --- unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index 89f055134..54ab34ba1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -241,7 +241,7 @@ __global__ void OuterReductionKernel(Reducer reducer, const Self input, Index nu } // Do the reduction. - const Index max_iter = divup(num_coeffs_to_reduce, NumPerThread) * num_preserved_coeffs; + const Index max_iter = num_preserved_coeffs * numext::maxi(1, (num_coeffs_to_reduce - NumPerThread + 1)); for (Index i = thread_id; i < max_iter; i += num_threads) { const Index input_col = i % num_preserved_coeffs; const Index input_row = (i / num_preserved_coeffs) * NumPerThread; -- cgit v1.2.3