Block evaluation for TensorChipping + fixed bugs in TensorPadding and TensorSlicing

author: Eugene Zhulenev <ezhulenev@google.com> 2019-10-09 12:45:31 -0700
committer: Eugene Zhulenev <ezhulenev@google.com> 2019-10-09 12:45:31 -0700
commit: 33e174613987cfc6c83576dc0fe8086c7a5d1b1f (patch)
tree: 4f4c62eab5c0feca0f233624c9c1fc571c491781 /unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
parent: f0a4642baba70a64128964d96c4ede012614925e (diff)
1 files changed, 13 insertions, 0 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 97ac96db1..6ad6327a6 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -521,6 +521,19 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
   static EIGEN_STRONG_INLINE void run(const Expression& expr,
                                       const ThreadPoolDevice& device) {
     Evaluator evaluator(expr, device);
+    Index total_size = array_prod(evaluator.dimensions());
+    Index cache_size = device.firstLevelCacheSize() / sizeof(Scalar);
+
+    // TODO(ezuhulenev): For small expressions cost of block mapping and
+    // resource requirements gathering dominates the cost of expression
+    // evaluatiuon.
+    if (total_size < cache_size &&
+        !ExpressionHasTensorBroadcastingOp<Expression>::value) {
+      internal::TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
+                               /*Tiling=*/TiledEvaluation::Off>::run(expr, device);
+      evaluator.cleanup();
+      return;
+    }
 
     const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr);
     if (needs_assign) {
author	Eugene Zhulenev <ezhulenev@google.com>	2019-10-09 12:45:31 -0700
committer	Eugene Zhulenev <ezhulenev@google.com>	2019-10-09 12:45:31 -0700
commit	33e174613987cfc6c83576dc0fe8086c7a5d1b1f (patch)
tree	4f4c62eab5c0feca0f233624c9c1fc571c491781 /unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
parent	f0a4642baba70a64128964d96c4ede012614925e (diff)