diff options
author | Eugene Zhulenev <eugene.zhulenev@gmail.com> | 2019-12-18 20:07:00 +0000 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-12-18 20:07:00 +0000 |
commit | ae07801dd8d295657f28b006e1e4999edf835052 (patch) | |
tree | 08a91a4368c15d365127344f920bd10f8e437db2 /unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h | |
parent | 72166d0e6eaf12a99f449e26f402f926bef2bb50 (diff) |
Tensor block evaluation cost model
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h | 21 |
1 files changed, 14 insertions, 7 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index 1a6891ffd..597ca64cd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -249,14 +249,21 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> static const int inner_dim = Layout == static_cast<int>(ColMajor) ? 0 : NumDims - 1; - const size_t target_block_size = numext::maxi<size_t>( - 1, m_device.firstLevelCacheSize() / sizeof(Scalar)); - + const size_t target_size = m_device.firstLevelCacheSize(); const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim; - return {inner_dim_shuffled - ? internal::TensorBlockShapeType::kUniformAllDims - : internal::TensorBlockShapeType::kSkewedInnerDims, - target_block_size}; + + // Shuffled inner dimensions leads to a random memory access, which is not + // captured by default cost model bytes loaded/stored. We add this cost + // explicitly. The number of cycles picked based on the benchmarks. + // TODO(ezhulenev): This number was picked based on a very questionable + // benchmarks, add benchmarks that are representative of real workloads. + using BlockRequirements = internal::TensorBlockResourceRequirements; + if (inner_dim_shuffled) { + return BlockRequirements::uniform<Scalar>(target_size) + .addCostPerCoeff({0, 0, NumDims * 28}); + } else { + return BlockRequirements::skewed<Scalar>(target_size); + } } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock |