From 8491127082e5f6568983255a459ca737271aaf3f Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 4 Feb 2019 12:59:33 -0800 Subject: Do not reduce parallelism too much in contractions with small number of threads --- .../Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index 4932514c7..4af8d3b18 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -339,10 +339,19 @@ struct TensorEvaluator= device_.numThreadsInPool()) { - parallelize_by_sharding_dim_only_ = true; - int num_worker_threads = device_.numThreadsInPool(); + const int num_worker_threads = device_.numThreadsInPool(); + + // With small number of threads we want to make sure that we do not reduce + // parallelism too much. + const int oversharding_factor = + num_worker_threads <= 4 ? 8 : + num_worker_threads <= 8 ? 4 : + num_worker_threads <= 16 ? 2 : 1; + + if (!parallel_pack_ && + sharding_dim_tasks >= oversharding_factor * num_worker_threads) { + parallelize_by_sharding_dim_only_ = true; if (shard_by_col) { can_use_thread_local_packed_ = new std::atomic[nn_]; -- cgit v1.2.3