Add async evaluation support to TensorPadding/TensorImagePatch/TensorShuffling

author: Eugene Zhulenev <ezhulenev@google.com> 2019-11-26 11:41:57 -0800
committer: Eugene Zhulenev <ezhulenev@google.com> 2019-11-26 11:41:57 -0800
commit: bc66c88255a29460fb26dde0a8558db6a3524cd5 (patch)
tree: fb0d883d7abc904db7080b842b34488a6c02a23c
parent: c79b6ffe1fcf8c12005942a1268f79b7d6ecf700 (diff)
4 files changed, 35 insertions, 2 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
index 722032a3a..e6bb56136 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
@@ -176,7 +176,8 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
         /*dst_base=*/m_buffer + desc.offset(),
         /*dst_strides=*/internal::strides<Layout>(m_impl.dimensions()));
 
-    ArgTensorBlock block = m_impl.blockV2(desc, scratch);
+    ArgTensorBlock block =
+        m_impl.blockV2(desc, scratch, /*root_of_expr_ast=*/true);
 
     // If block was evaluated into a destination buffer, there is no need to do
     // an assignment.
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
index 49bc60f0a..76bed5526 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
@@ -397,6 +397,14 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
     return true;
   }
 
+#ifdef EIGEN_USE_THREADS
+  template <typename EvalSubExprsCallback>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+      EvaluatorPointerType, EvalSubExprsCallback done) {
+    m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
+  }
+#endif  // EIGEN_USE_THREADS
+
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
     m_impl.cleanup();
   }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
index 4a22922d9..4d1a09ada 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
@@ -155,6 +155,15 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
     m_impl.evalSubExprsIfNeeded(NULL);
     return true;
   }
+
+#ifdef EIGEN_USE_THREADS
+  template <typename EvalSubExprsCallback>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+      EvaluatorPointerType, EvalSubExprsCallback done) {
+    m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
+  }
+#endif  // EIGEN_USE_THREADS
+
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
     m_impl.cleanup();
   }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
index 72c43a39d..655fd91e8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
@@ -182,6 +182,15 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
     m_impl.evalSubExprsIfNeeded(NULL);
     return true;
   }
+
+#ifdef EIGEN_USE_THREADS
+  template <typename EvalSubExprsCallback>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+      EvaluatorPointerType, EvalSubExprsCallback done) {
+    m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
+  }
+#endif  // EIGEN_USE_THREADS
+
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
     m_impl.cleanup();
   }
@@ -237,10 +246,16 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
       std::vector<internal::TensorOpResourceRequirements>* resources) const {
+    static const int inner_dim =
+        Layout == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
+    const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim;
+
     Eigen::Index block_total_size_max = numext::maxi<Eigen::Index>(
         1, m_device.firstLevelCacheSize() / sizeof(Scalar));
     resources->push_back(internal::TensorOpResourceRequirements(
-        internal::kUniformAllDims, block_total_size_max));
+        inner_dim_shuffled ? internal::kUniformAllDims
+                           : internal::kSkewedInnerDims,
+        block_total_size_max));
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
author	Eugene Zhulenev <ezhulenev@google.com>	2019-11-26 11:41:57 -0800
committer	Eugene Zhulenev <ezhulenev@google.com>	2019-11-26 11:41:57 -0800
commit	bc66c88255a29460fb26dde0a8558db6a3524cd5 (patch)
tree	fb0d883d7abc904db7080b842b34488a6c02a23c
parent	c79b6ffe1fcf8c12005942a1268f79b7d6ecf700 (diff)