aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <eugene.zhulenev@gmail.com>2019-12-19 05:43:57 +0000
committerGravatar Eugene Zhulenev <eugene.zhulenev@gmail.com>2019-12-19 05:43:57 +0000
commit7a65219a2eb5c3d8a4b8629082a18c40dc6c332a (patch)
treeac1f3b3b8e4f4117bc7040dc61a0081fb6c70bf5 /unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
parent73e55525e50d76874fc3342623bed73490407392 (diff)
Fix TensorPadding bug in squeezed reads from inner dimension
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h9
1 files changed, 6 insertions, 3 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
index e070d0b93..561666c6f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
@@ -364,9 +364,12 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
// When possible we squeeze writes for the innermost (only if non-padded)
// dimension with the first padded dimension. This allows to reduce the
// number of calls to LinCopy and better utilize vector instructions.
- const bool squeeze_writes = NumDims > 1 &&
- // inner dimension is not padded
- input_inner_dim_size == output_inner_dim_size;
+ const bool squeeze_writes =
+ NumDims > 1 &&
+ // inner dimension is not padded
+ (input_inner_dim_size == m_dimensions[inner_dim_idx]) &&
+ // and equal to the block inner dimension
+ (input_inner_dim_size == output_inner_dim_size);
const int squeeze_dim = IsColMajor ? inner_dim_idx + 1 : inner_dim_idx - 1;