diff options
author | Eugene Zhulenev <eugene.zhulenev@gmail.com> | 2019-12-18 20:07:00 +0000 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-12-18 20:07:00 +0000 |
commit | ae07801dd8d295657f28b006e1e4999edf835052 (patch) | |
tree | 08a91a4368c15d365127344f920bd10f8e437db2 /unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h | |
parent | 72166d0e6eaf12a99f449e26f402f926bef2bb50 (diff) |
Tensor block evaluation cost model
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 8 |
1 files changed, 3 insertions, 5 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 620c8741c..3408f90d1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -620,12 +620,10 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> internal::TensorBlockResourceRequirements getResourceRequirements() const { // TODO(wuke): Targeting L1 size is 30% faster than targeting L{-1} on large // tensors. But this might need further tuning. - const size_t target_block_size = numext::maxi<size_t>( - 1, m_device.firstLevelCacheSize() / sizeof(Scalar)); - + const size_t target_size = m_device.firstLevelCacheSize(); return internal::TensorBlockResourceRequirements::merge( - {internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size}, - m_impl.getResourceRequirements()); + m_impl.getResourceRequirements(), + internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock |