aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-01-14 09:29:48 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-01-14 09:29:48 -0800
commit8fe2532e70a8e0261717003d96d4df41ab978756 (patch)
tree56cee0a781ade05d15fa5d480c30435a9bac461f /unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
parent9f013a9d86ad5cf82939bfeab2223652a821c448 (diff)
Fixed a boundary condition bug in the outer reduction kernel
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h2
1 files changed, 1 insertions, 1 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
index 89f055134..54ab34ba1 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@@ -241,7 +241,7 @@ __global__ void OuterReductionKernel(Reducer reducer, const Self input, Index nu
}
// Do the reduction.
- const Index max_iter = divup<Index>(num_coeffs_to_reduce, NumPerThread) * num_preserved_coeffs;
+ const Index max_iter = num_preserved_coeffs * numext::maxi<Index>(1, (num_coeffs_to_reduce - NumPerThread + 1));
for (Index i = thread_id; i < max_iter; i += num_threads) {
const Index input_col = i % num_preserved_coeffs;
const Index input_row = (i / num_preserved_coeffs) * NumPerThread;