aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-10-22 12:42:44 -0700
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-10-22 12:42:44 -0700
commit97c0c5d485ddec0369326825a41db48d8505cf4c (patch)
tree9072616f37eacc24f407061ac74954d67da8c5ee /unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
parent668ab3fc474e54c7919eda4fbaf11f3a99246494 (diff)
Add block evaluation V2 to TensorAsyncExecutor.
Add async evaluation to a number of ops.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h31
1 files changed, 23 insertions, 8 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
index 7d12e781e..e5b67a18c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
@@ -132,14 +132,6 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
const Index numValues = internal::array_prod(m_impl.dimensions());
m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType)));
- #ifndef EIGEN_USE_SYCL
- // Should initialize the memory in case we're dealing with non POD types.
- if (NumTraits<CoeffReturnType>::RequireInitialization) {
- for (Index i = 0; i < numValues; ++i) {
- new(m_buffer+i) CoeffReturnType();
- }
- }
- #endif
typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
EvalTo evalToTmp(m_device.get(m_buffer), m_op);
@@ -151,6 +143,29 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
return true;
}
+
+#ifdef EIGEN_USE_THREADS
+ template <typename EvalSubExprsCallback>
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void evalSubExprsIfNeededAsync(
+ EvaluatorPointerType, EvalSubExprsCallback done) {
+ const Index numValues = internal::array_prod(m_impl.dimensions());
+ m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(
+ numValues * sizeof(CoeffReturnType)));
+ typedef TensorEvalToOp<const typename internal::remove_const<ArgType>::type>
+ EvalTo;
+ EvalTo evalToTmp(m_device.get(m_buffer), m_op);
+
+ auto on_done = std::bind([](EvalSubExprsCallback done) { done(true); },
+ std::move(done));
+ internal::TensorAsyncExecutor<
+ const EvalTo, typename internal::remove_const<Device>::type,
+ decltype(on_done),
+ /*Vectorizable=*/internal::IsVectorizable<Device, const ArgType>::value,
+ /*Tiling=*/internal::IsTileable<Device, const ArgType>::value>::
+ runAsync(evalToTmp, m_device, std::move(on_done));
+ }
+#endif
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
m_device.deallocate_temp(m_buffer);
m_buffer = NULL;