From ae07801dd8d295657f28b006e1e4999edf835052 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Wed, 18 Dec 2019 20:07:00 +0000 Subject: Tensor block evaluation cost model --- .../Eigen/CXX11/src/Tensor/TensorShuffling.h | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index 1a6891ffd..597ca64cd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -249,14 +249,21 @@ struct TensorEvaluator, Device> static const int inner_dim = Layout == static_cast(ColMajor) ? 0 : NumDims - 1; - const size_t target_block_size = numext::maxi( - 1, m_device.firstLevelCacheSize() / sizeof(Scalar)); - + const size_t target_size = m_device.firstLevelCacheSize(); const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim; - return {inner_dim_shuffled - ? internal::TensorBlockShapeType::kUniformAllDims - : internal::TensorBlockShapeType::kSkewedInnerDims, - target_block_size}; + + // Shuffled inner dimensions leads to a random memory access, which is not + // captured by default cost model bytes loaded/stored. We add this cost + // explicitly. The number of cycles picked based on the benchmarks. + // TODO(ezhulenev): This number was picked based on a very questionable + // benchmarks, add benchmarks that are representative of real workloads. + using BlockRequirements = internal::TensorBlockResourceRequirements; + if (inner_dim_shuffled) { + return BlockRequirements::uniform(target_size) + .addCostPerCoeff({0, 0, NumDims * 28}); + } else { + return BlockRequirements::skewed(target_size); + } } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock -- cgit v1.2.3