From eb21bab769b11546d08f7db0b5bb78bfde6cdbae Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 4 Feb 2019 10:43:16 -0800 Subject: Parallelize tensor contraction only by sharding dimension and use 'thread-local' memory for packing --- unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index e03735611..fb34cd75e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -122,6 +122,12 @@ struct ThreadPoolDevice { return num_threads_; } + // Number of theads available in the underlying thread pool. This number can + // be different from the value returned by numThreads(). + EIGEN_STRONG_INLINE int numThreadsInPool() const { + return pool_->NumThreads(); + } + EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { return l1CacheSize(); } -- cgit v1.2.3