Don't crash when attempting to reduce empty tensors.

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-04-20 18:08:20 -0700
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-04-20 18:08:20 -0700
commit: 2dde1b102866e1928e925678951463f2a7051af1 (patch)
tree: d6405558903617aa9f5fabe4be8b5601fe6d406a /unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
parent: a792cd357d31f0a4fce62ed1fa4cc0334cf2f143 (diff)
1 files changed, 6 insertions, 1 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
index afa5a257a..fd2587dd5 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@@ -134,9 +134,14 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
     typedef typename Self::Index Index;
 
     const Index num_coeffs = array_prod(self.m_impl.dimensions());
+    // Don't crash when we're called with an input tensor of size 0.
+    if (num_coeffs == 0) {
+      return;
+    }
+
     const int block_size = 256;
     const int num_per_thread = 128;
-    const int num_blocks = numext::ceil(static_cast<float>(num_coeffs) / (block_size * num_per_thread));
+    const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
 
     if (num_blocks > 1) {
       // We initialize the outputs outside the reduction kernel when we can't be sure that there
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-04-20 18:08:20 -0700
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-04-20 18:08:20 -0700
commit	2dde1b102866e1928e925678951463f2a7051af1 (patch)
tree	d6405558903617aa9f5fabe4be8b5601fe6d406a /unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
parent	a792cd357d31f0a4fce62ed1fa4cc0334cf2f143 (diff)