aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h13
1 files changed, 13 insertions, 0 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 97ac96db1..6ad6327a6 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -521,6 +521,19 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
static EIGEN_STRONG_INLINE void run(const Expression& expr,
const ThreadPoolDevice& device) {
Evaluator evaluator(expr, device);
+ Index total_size = array_prod(evaluator.dimensions());
+ Index cache_size = device.firstLevelCacheSize() / sizeof(Scalar);
+
+ // TODO(ezuhulenev): For small expressions cost of block mapping and
+ // resource requirements gathering dominates the cost of expression
+ // evaluatiuon.
+ if (total_size < cache_size &&
+ !ExpressionHasTensorBroadcastingOp<Expression>::value) {
+ internal::TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
+ /*Tiling=*/TiledEvaluation::Off>::run(expr, device);
+ evaluator.cleanup();
+ return;
+ }
const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr);
if (needs_assign) {