From 2dde1b102866e1928e925678951463f2a7051af1 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 20 Apr 2016 18:08:20 -0700 Subject: Don't crash when attempting to reduce empty tensors. --- unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h | 12 ++++++++++-- unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h | 7 ++++++- 3 files changed, 17 insertions(+), 4 deletions(-) (limited to 'unsupported/Eigen/CXX11') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index bf6e10a7b..c3edae477 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -238,7 +238,7 @@ inline void TensorExecutor::run( device.maxCudaThreadsPerMultiProcessor() / block_size; const Index size = array_prod(evaluator.dimensions()); // Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0. - const int num_blocks = numext::maxi(numext::mini(max_blocks, (size + block_size - 1) / block_size), 1); + const int num_blocks = numext::maxi(numext::mini(max_blocks, divup(size, block_size)), 1); LAUNCH_CUDA_KERNEL( (EigenMetaKernel, Index>), diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 6af2d45d4..cd04716bd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -24,9 +24,17 @@ const T2& choose(Cond, const T1&, const T2& second) { return second; } -template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T divup(const X x, const Y y) { + return static_cast((x + y - 1) / y); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T divup(const T x, const T y) { - return (x + y - 1) / y; + return static_cast((x + y - 1) / y); } template struct max_n_1 { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index afa5a257a..fd2587dd5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -134,9 +134,14 @@ struct FullReducer { typedef typename Self::Index Index; const Index num_coeffs = array_prod(self.m_impl.dimensions()); + // Don't crash when we're called with an input tensor of size 0. + if (num_coeffs == 0) { + return; + } + const int block_size = 256; const int num_per_thread = 128; - const int num_blocks = numext::ceil(static_cast(num_coeffs) / (block_size * num_per_thread)); + const int num_blocks = divup(num_coeffs, block_size * num_per_thread); if (num_blocks > 1) { // We initialize the outputs outside the reduction kernel when we can't be sure that there -- cgit v1.2.3