aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-04-20 18:08:20 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-04-20 18:08:20 -0700
commit2dde1b102866e1928e925678951463f2a7051af1 (patch)
treed6405558903617aa9f5fabe4be8b5601fe6d406a /unsupported/Eigen
parenta792cd357d31f0a4fce62ed1fa4cc0334cf2f143 (diff)
Don't crash when attempting to reduce empty tensors.
Diffstat (limited to 'unsupported/Eigen')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h12
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h7
3 files changed, 17 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index bf6e10a7b..c3edae477 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -238,7 +238,7 @@ inline void TensorExecutor<Expression, GpuDevice, Vectorizable>::run(
device.maxCudaThreadsPerMultiProcessor() / block_size;
const Index size = array_prod(evaluator.dimensions());
// Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0.
- const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);
+ const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, divup<int>(size, block_size)), 1);
LAUNCH_CUDA_KERNEL(
(EigenMetaKernel<TensorEvaluator<Expression, GpuDevice>, Index>),
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
index 6af2d45d4..cd04716bd 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
@@ -24,9 +24,17 @@ const T2& choose(Cond<false>, const T1&, const T2& second) {
return second;
}
-template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+
+template <typename T, typename X, typename Y>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T divup(const X x, const Y y) {
+ return static_cast<T>((x + y - 1) / y);
+}
+
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T divup(const T x, const T y) {
- return (x + y - 1) / y;
+ return static_cast<T>((x + y - 1) / y);
}
template <size_t n> struct max_n_1 {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
index afa5a257a..fd2587dd5 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@@ -134,9 +134,14 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
typedef typename Self::Index Index;
const Index num_coeffs = array_prod(self.m_impl.dimensions());
+ // Don't crash when we're called with an input tensor of size 0.
+ if (num_coeffs == 0) {
+ return;
+ }
+
const int block_size = 256;
const int num_per_thread = 128;
- const int num_blocks = numext::ceil(static_cast<float>(num_coeffs) / (block_size * num_per_thread));
+ const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
if (num_blocks > 1) {
// We initialize the outputs outside the reduction kernel when we can't be sure that there