From ae07801dd8d295657f28b006e1e4999edf835052 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Wed, 18 Dec 2019 20:07:00 +0000 Subject: Tensor block evaluation cost model --- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 23 +++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index 146cc325e..d4532b72c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -521,7 +521,9 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const { - return m_argImpl.getResourceRequirements(); + static const double functor_cost = internal::functor_traits::Cost; + return m_argImpl.getResourceRequirements().addCostPerCoeff( + {0, 0, functor_cost / PacketSize}); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock @@ -654,9 +656,11 @@ struct TensorEvaluator::Cost; return internal::TensorBlockResourceRequirements::merge( - m_leftImpl.getResourceRequirements(), - m_rightImpl.getResourceRequirements()); + m_leftImpl.getResourceRequirements(), + m_rightImpl.getResourceRequirements()) + .addCostPerCoeff({0, 0, functor_cost / PacketSize}); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock @@ -934,11 +938,16 @@ struct TensorEvaluator EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const { + auto then_req = m_thenImpl.getResourceRequirements(); + auto else_req = m_elseImpl.getResourceRequirements(); + + auto merged_req = + internal::TensorBlockResourceRequirements::merge(then_req, else_req); + merged_req.cost_per_coeff = + then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff); + return internal::TensorBlockResourceRequirements::merge( - m_condImpl.getResourceRequirements(), - internal::TensorBlockResourceRequirements::merge( - m_thenImpl.getResourceRequirements(), - m_elseImpl.getResourceRequirements())); + m_condImpl.getResourceRequirements(), merged_req); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock -- cgit v1.2.3