From a407e022e6046917b1ebeacd54b03fcb079a9706 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 5 Mar 2019 14:19:59 -0800 Subject: Tune tensor contraction threadpool heuristics --- .../Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index d7cd995fb..adf57c892 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -216,11 +216,14 @@ struct TensorEvaluatorm_device.numThreadsInPool(); // With small number of threads we want to make sure that we do not reduce - // parallelism too much. - const int oversharding_factor = - num_worker_threads <= 4 ? 8 : - num_worker_threads <= 8 ? 4 : - num_worker_threads <= 16 ? 2 : 1; + // parallelism too much. With large number of threads we trade maximum + // parallelism for better memory locality. + const float oversharding_factor = + num_worker_threads <= 4 ? 8.0 : + num_worker_threads <= 8 ? 4.0 : + num_worker_threads <= 16 ? 2.0 : + num_worker_threads <= 32 ? 1.0 : + num_worker_threads <= 64 ? 0.8 : /* num_worker_threads > 64 */ 0.6; const bool parallelize_by_sharding_dim_only = sharding_dim_tasks >= oversharding_factor * num_worker_threads; -- cgit v1.2.3