diff options
author | Deven Desai <deven.desai.amd@gmail.com> | 2019-03-19 16:52:38 -0400 |
---|---|---|
committer | Deven Desai <deven.desai.amd@gmail.com> | 2019-03-19 16:52:38 -0400 |
commit | 2dbea5510fe5cb64dbfdef9042c04a3a92b87f76 (patch) | |
tree | c187e7ec5e90a191e19466ff6084dd8f053dba7e /unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | |
parent | e7e6809e6b38a5928efc0b5ca9520258e4d1fb3a (diff) | |
parent | 5c93b38c5fca514a08084e32feb8a8fb27bf3665 (diff) |
Merged eigen/eigen into default
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index bb330a77b..b43db40c8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -87,13 +87,13 @@ struct ThreadPoolDevice { const size_t kMinBlockSize = 32768; typedef TensorCostModel<ThreadPoolDevice> CostModel; const size_t num_threads = CostModel::numThreads(n, TensorOpCost(1.0, 1.0, 0), 4); - if (n <= kMinBlockSize || num_threads == 1) { + if (n <= kMinBlockSize || num_threads < 2) { ::memcpy(dst, src, n); } else { const char* src_ptr = static_cast<const char*>(src); char* dst_ptr = static_cast<char*>(dst); const size_t blocksize = (n + (num_threads - 1)) / num_threads; - Barrier barrier(num_threads - 1); + Barrier barrier(static_cast<int>(num_threads - 1)); // Launch the last 3 blocks on worker threads. for (size_t i = 1; i < num_threads; ++i) { enqueue_with_barrier(&barrier, [n, i, src_ptr, dst_ptr, blocksize] { @@ -122,6 +122,12 @@ struct ThreadPoolDevice { return num_threads_; } + // Number of theads available in the underlying thread pool. This number can + // be different from the value returned by numThreads(). + EIGEN_STRONG_INLINE int numThreadsInPool() const { + return pool_->NumThreads(); + } + EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { return l1CacheSize(); } |