diff options
author | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-10-22 12:42:44 -0700 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-10-22 12:42:44 -0700 |
commit | 97c0c5d485ddec0369326825a41db48d8505cf4c (patch) | |
tree | 9072616f37eacc24f407061ac74954d67da8c5ee /unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h | |
parent | 668ab3fc474e54c7919eda4fbaf11f3a99246494 (diff) |
Add block evaluation V2 to TensorAsyncExecutor.
Add async evaluation to a number of ops.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h | 31 |
1 files changed, 23 insertions, 8 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index 7d12e781e..e5b67a18c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -132,14 +132,6 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device> EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { const Index numValues = internal::array_prod(m_impl.dimensions()); m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType))); - #ifndef EIGEN_USE_SYCL - // Should initialize the memory in case we're dealing with non POD types. - if (NumTraits<CoeffReturnType>::RequireInitialization) { - for (Index i = 0; i < numValues; ++i) { - new(m_buffer+i) CoeffReturnType(); - } - } - #endif typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo; EvalTo evalToTmp(m_device.get(m_buffer), m_op); @@ -151,6 +143,29 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device> return true; } + +#ifdef EIGEN_USE_THREADS + template <typename EvalSubExprsCallback> + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void evalSubExprsIfNeededAsync( + EvaluatorPointerType, EvalSubExprsCallback done) { + const Index numValues = internal::array_prod(m_impl.dimensions()); + m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp( + numValues * sizeof(CoeffReturnType))); + typedef TensorEvalToOp<const typename internal::remove_const<ArgType>::type> + EvalTo; + EvalTo evalToTmp(m_device.get(m_buffer), m_op); + + auto on_done = std::bind([](EvalSubExprsCallback done) { done(true); }, + std::move(done)); + internal::TensorAsyncExecutor< + const EvalTo, typename internal::remove_const<Device>::type, + decltype(on_done), + /*Vectorizable=*/internal::IsVectorizable<Device, const ArgType>::value, + /*Tiling=*/internal::IsTileable<Device, const ArgType>::value>:: + runAsync(evalToTmp, m_device, std::move(on_done)); + } +#endif + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { m_device.deallocate_temp(m_buffer); m_buffer = NULL; |