From 7a65219a2eb5c3d8a4b8629082a18c40dc6c332a Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Thu, 19 Dec 2019 05:43:57 +0000 Subject: Fix TensorPadding bug in squeezed reads from inner dimension --- unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index e070d0b93..561666c6f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -364,9 +364,12 @@ struct TensorEvaluator, Device // When possible we squeeze writes for the innermost (only if non-padded) // dimension with the first padded dimension. This allows to reduce the // number of calls to LinCopy and better utilize vector instructions. - const bool squeeze_writes = NumDims > 1 && - // inner dimension is not padded - input_inner_dim_size == output_inner_dim_size; + const bool squeeze_writes = + NumDims > 1 && + // inner dimension is not padded + (input_inner_dim_size == m_dimensions[inner_dim_idx]) && + // and equal to the block inner dimension + (input_inner_dim_size == output_inner_dim_size); const int squeeze_dim = IsColMajor ? inner_dim_idx + 1 : inner_dim_idx - 1; -- cgit v1.2.3