From 878845cb25c1ba9e56883fd0654eafb55a22fc34 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Fri, 28 Jun 2019 11:13:44 -0700 Subject: Add block access to TensorReverseOp and make sure that TensorForcedEval uses block access when preferred --- .../Eigen/CXX11/src/Tensor/TensorShuffling.h | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index 416948765..b577d4d36 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -270,6 +270,11 @@ struct TensorEvaluator, Device> input_block_strides[i + 1] * input_block_sizes[i + 1]; } } + DSizes, NumDims> fast_input_block_strides; + for (int i = 0; i < NumDims; ++i) { + fast_input_block_strides[i] = + internal::TensorIntDivisor(input_block_strides[i]); + } // Read input block. TensorBlock input_block(srcCoeff(output_block->first_coeff_index()), @@ -293,8 +298,9 @@ struct TensorEvaluator, Device> continue; } - Index output_index = GetBlockOutputIndex(input_index, input_block_strides, - output_block_strides); + Index output_index = + GetBlockOutputIndex(input_index, input_block_strides, + output_block_strides, fast_input_block_strides); if (output_index == input_index) { // Coefficient already in place. bitmap[output_index] = true; @@ -312,8 +318,9 @@ struct TensorEvaluator, Device> data[output_index] = shuffled_value; shuffled_value = evicted_value; bitmap[output_index] = true; - output_index = GetBlockOutputIndex(output_index, input_block_strides, - output_block_strides); + output_index = + GetBlockOutputIndex(output_index, input_block_strides, + output_block_strides, fast_input_block_strides); } while (output_index != input_index); data[output_index] = shuffled_value; @@ -341,11 +348,12 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index GetBlockOutputIndex( Index input_index, const DSizes& input_block_strides, - const DSizes& output_block_strides) const { + const DSizes& output_block_strides, + const DSizes, NumDims>& fast_input_block_strides) const { Index output_index = 0; if (static_cast(Layout) == static_cast(ColMajor)) { for (int i = NumDims - 1; i > 0; --i) { - const Index idx = input_index / input_block_strides[i]; + const Index idx = input_index / fast_input_block_strides[i]; output_index += idx * output_block_strides[m_inverseShuffle[i]]; input_index -= idx * input_block_strides[i]; } @@ -353,7 +361,7 @@ struct TensorEvaluator, Device> output_block_strides[m_inverseShuffle[0]]; } else { for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = input_index / input_block_strides[i]; + const Index idx = input_index / fast_input_block_strides[i]; output_index += idx * output_block_strides[m_inverseShuffle[i]]; input_index -= idx * input_block_strides[i]; } -- cgit v1.2.3