From ae07801dd8d295657f28b006e1e4999edf835052 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Wed, 18 Dec 2019 20:07:00 +0000 Subject: Tensor block evaluation cost model --- unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 620c8741c..3408f90d1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -620,12 +620,10 @@ struct TensorEvaluator, Device> internal::TensorBlockResourceRequirements getResourceRequirements() const { // TODO(wuke): Targeting L1 size is 30% faster than targeting L{-1} on large // tensors. But this might need further tuning. - const size_t target_block_size = numext::maxi( - 1, m_device.firstLevelCacheSize() / sizeof(Scalar)); - + const size_t target_size = m_device.firstLevelCacheSize(); return internal::TensorBlockResourceRequirements::merge( - {internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size}, - m_impl.getResourceRequirements()); + m_impl.getResourceRequirements(), + internal::TensorBlockResourceRequirements::skewed(target_size)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock -- cgit v1.2.3