aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2019-06-28 11:13:44 -0700
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2019-06-28 11:13:44 -0700
commit878845cb25c1ba9e56883fd0654eafb55a22fc34 (patch)
tree848fdcee1dc377feee2ef45495b3ad21839d0244 /unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
parent16a56b2dddbfaf2d4b81d62be5e3139f12783ac8 (diff)
Add block access to TensorReverseOp and make sure that TensorForcedEval uses block access when preferred
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h22
1 files changed, 15 insertions, 7 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
index 416948765..b577d4d36 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
@@ -270,6 +270,11 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
input_block_strides[i + 1] * input_block_sizes[i + 1];
}
}
+ DSizes<internal::TensorIntDivisor<Index>, NumDims> fast_input_block_strides;
+ for (int i = 0; i < NumDims; ++i) {
+ fast_input_block_strides[i] =
+ internal::TensorIntDivisor<Index>(input_block_strides[i]);
+ }
// Read input block.
TensorBlock input_block(srcCoeff(output_block->first_coeff_index()),
@@ -293,8 +298,9 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
continue;
}
- Index output_index = GetBlockOutputIndex(input_index, input_block_strides,
- output_block_strides);
+ Index output_index =
+ GetBlockOutputIndex(input_index, input_block_strides,
+ output_block_strides, fast_input_block_strides);
if (output_index == input_index) {
// Coefficient already in place.
bitmap[output_index] = true;
@@ -312,8 +318,9 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
data[output_index] = shuffled_value;
shuffled_value = evicted_value;
bitmap[output_index] = true;
- output_index = GetBlockOutputIndex(output_index, input_block_strides,
- output_block_strides);
+ output_index =
+ GetBlockOutputIndex(output_index, input_block_strides,
+ output_block_strides, fast_input_block_strides);
} while (output_index != input_index);
data[output_index] = shuffled_value;
@@ -341,11 +348,12 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index GetBlockOutputIndex(
Index input_index,
const DSizes<Index, NumDims>& input_block_strides,
- const DSizes<Index, NumDims>& output_block_strides) const {
+ const DSizes<Index, NumDims>& output_block_strides,
+ const DSizes<internal::TensorIntDivisor<Index>, NumDims>& fast_input_block_strides) const {
Index output_index = 0;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
for (int i = NumDims - 1; i > 0; --i) {
- const Index idx = input_index / input_block_strides[i];
+ const Index idx = input_index / fast_input_block_strides[i];
output_index += idx * output_block_strides[m_inverseShuffle[i]];
input_index -= idx * input_block_strides[i];
}
@@ -353,7 +361,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
output_block_strides[m_inverseShuffle[0]];
} else {
for (int i = 0; i < NumDims - 1; ++i) {
- const Index idx = input_index / input_block_strides[i];
+ const Index idx = input_index / fast_input_block_strides[i];
output_index += idx * output_block_strides[m_inverseShuffle[i]];
input_index -= idx * input_block_strides[i];
}