From 97c0c5d485ddec0369326825a41db48d8505cf4c Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Tue, 22 Oct 2019 12:42:44 -0700 Subject: Add block evaluation V2 to TensorAsyncExecutor. Add async evaluation to a number of ops. --- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 45 +++++++++++++++++----- 1 file changed, 36 insertions(+), 9 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index a5c293cf9..d826cfb7e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -689,15 +689,14 @@ struct TensorReductionEvaluatorBase::HasOptimizedImplementation && @@ -802,6 +801,34 @@ struct TensorReductionEvaluatorBase + EIGEN_STRONG_INLINE +#if !defined(EIGEN_HIPCC) + EIGEN_DEVICE_FUNC +#endif + void + evalSubExprsIfNeededAsync(EvaluatorPointerType data, + EvalSubExprsCallback done) { + m_impl.evalSubExprsIfNeededAsync(NULL, [this, data, done](bool) { + done(evalSubExprsIfNeededCommon(data)); + }); + } +#endif + + EIGEN_STRONG_INLINE +#if !defined(EIGEN_HIPCC) + // Marking this as EIGEN_DEVICE_FUNC for HIPCC requires also doing the same + // for all the functions being called within here, which then leads to + // proliferation of EIGEN_DEVICE_FUNC markings, one of which will eventually + // result in an NVCC error + EIGEN_DEVICE_FUNC +#endif + bool evalSubExprsIfNeeded(EvaluatorPointerType data) { + m_impl.evalSubExprsIfNeeded(NULL); + return evalSubExprsIfNeededCommon(data); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); if (m_result) { -- cgit v1.2.3