aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2019-03-05 14:19:59 -0800
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2019-03-05 14:19:59 -0800
commita407e022e6046917b1ebeacd54b03fcb079a9706 (patch)
tree97aa14a83c888f1e84880084111c672b999d9bc8 /unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
parent56c6373f82d4cf42a489951fb08566d3e5b612ef (diff)
Tune tensor contraction threadpool heuristics
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h13
1 files changed, 8 insertions, 5 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
index d7cd995fb..adf57c892 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
@@ -216,11 +216,14 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
const int num_worker_threads = this->m_device.numThreadsInPool();
// With small number of threads we want to make sure that we do not reduce
- // parallelism too much.
- const int oversharding_factor =
- num_worker_threads <= 4 ? 8 :
- num_worker_threads <= 8 ? 4 :
- num_worker_threads <= 16 ? 2 : 1;
+ // parallelism too much. With large number of threads we trade maximum
+ // parallelism for better memory locality.
+ const float oversharding_factor =
+ num_worker_threads <= 4 ? 8.0 :
+ num_worker_threads <= 8 ? 4.0 :
+ num_worker_threads <= 16 ? 2.0 :
+ num_worker_threads <= 32 ? 1.0 :
+ num_worker_threads <= 64 ? 0.8 : /* num_worker_threads > 64 */ 0.6;
const bool parallelize_by_sharding_dim_only =
sharding_dim_tasks >= oversharding_factor * num_worker_threads;