aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2019-08-30 15:13:38 -0700
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2019-08-30 15:13:38 -0700
commitf0b36fb9a405400e82b73ea70097b8ae3cd1095a (patch)
treed3a2903422799257720d2d4989bcd845ab2ae27e /unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
parent619cea94916e7531a839ee0ff657714857921db8 (diff)
evalSubExprsIfNeededAsync + async TensorContractionThreadPool
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h51
1 files changed, 50 insertions, 1 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index a3a79d4e9..fec735868 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -79,7 +79,16 @@ struct TensorEvaluator
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
+#ifdef EIGEN_USE_THREADS
+ template <typename EvalSubExprsCallback>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EvaluatorPointerType dest, EvalSubExprsCallback done) {
+ // TODO(ezhulenev): ThreadPoolDevice memcpy is blockign operation.
+ done(evalSubExprsIfNeeded(dest));
+ }
+#endif // EIGEN_USE_THREADS
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
eigen_assert(m_data != NULL);
@@ -247,6 +256,15 @@ struct TensorEvaluator<const Derived, Device>
return true;
}
+#ifdef EIGEN_USE_THREADS
+ template <typename EvalSubExprsCallback>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EvaluatorPointerType dest, EvalSubExprsCallback done) {
+ // TODO(ezhulenev): ThreadPoolDevice memcpy is a blockign operation.
+ done(evalSubExprsIfNeeded(dest));
+ }
+#endif // EIGEN_USE_THREADS
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
@@ -346,6 +364,15 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { return true; }
+
+#ifdef EIGEN_USE_THREADS
+ template <typename EvalSubExprsCallback>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EvaluatorPointerType, EvalSubExprsCallback done) {
+ done(true);
+ }
+#endif // EIGEN_USE_THREADS
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
@@ -425,6 +452,15 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
m_argImpl.evalSubExprsIfNeeded(NULL);
return true;
}
+
+#ifdef EIGEN_USE_THREADS
+ template <typename EvalSubExprsCallback>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EvaluatorPointerType, EvalSubExprsCallback done) {
+ m_argImpl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
+ }
+#endif // EIGEN_USE_THREADS
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
m_argImpl.cleanup();
}
@@ -546,6 +582,19 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
m_rightImpl.evalSubExprsIfNeeded(NULL);
return true;
}
+
+#ifdef EIGEN_USE_THREADS
+ template <typename EvalSubExprsCallback>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EvaluatorPointerType, EvalSubExprsCallback done) {
+ // TODO(ezhulenev): Evaluate two expression in parallel?
+ m_leftImpl.evalSubExprsIfNeededAsync(nullptr, [this, done](bool) {
+ m_rightImpl.evalSubExprsIfNeededAsync(nullptr,
+ [done](bool) { done(true); });
+ });
+ }
+#endif // EIGEN_USE_THREADS
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
m_leftImpl.cleanup();
m_rightImpl.cleanup();