From 97c0c5d485ddec0369326825a41db48d8505cf4c Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Tue, 22 Oct 2019 12:42:44 -0700 Subject: Add block evaluation V2 to TensorAsyncExecutor. Add async evaluation to a number of ops. --- .../Eigen/CXX11/src/Tensor/TensorForcedEval.h | 31 ++++++++++++++++------ 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index 7d12e781e..e5b67a18c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -132,14 +132,6 @@ struct TensorEvaluator, Device> EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { const Index numValues = internal::array_prod(m_impl.dimensions()); m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType))); - #ifndef EIGEN_USE_SYCL - // Should initialize the memory in case we're dealing with non POD types. - if (NumTraits::RequireInitialization) { - for (Index i = 0; i < numValues; ++i) { - new(m_buffer+i) CoeffReturnType(); - } - } - #endif typedef TensorEvalToOp< const typename internal::remove_const::type > EvalTo; EvalTo evalToTmp(m_device.get(m_buffer), m_op); @@ -151,6 +143,29 @@ struct TensorEvaluator, Device> return true; } + +#ifdef EIGEN_USE_THREADS + template + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void evalSubExprsIfNeededAsync( + EvaluatorPointerType, EvalSubExprsCallback done) { + const Index numValues = internal::array_prod(m_impl.dimensions()); + m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp( + numValues * sizeof(CoeffReturnType))); + typedef TensorEvalToOp::type> + EvalTo; + EvalTo evalToTmp(m_device.get(m_buffer), m_op); + + auto on_done = std::bind([](EvalSubExprsCallback done) { done(true); }, + std::move(done)); + internal::TensorAsyncExecutor< + const EvalTo, typename internal::remove_const::type, + decltype(on_done), + /*Vectorizable=*/internal::IsVectorizable::value, + /*Tiling=*/internal::IsTileable::value>:: + runAsync(evalToTmp, m_device, std::move(on_done)); + } +#endif + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { m_device.deallocate_temp(m_buffer); m_buffer = NULL; -- cgit v1.2.3