aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
diff options
context:
space:
mode:
authorGravatar Deven Desai <deven.desai.amd@gmail.com>2019-03-19 16:52:38 -0400
committerGravatar Deven Desai <deven.desai.amd@gmail.com>2019-03-19 16:52:38 -0400
commit2dbea5510fe5cb64dbfdef9042c04a3a92b87f76 (patch)
treec187e7ec5e90a191e19466ff6084dd8f053dba7e /unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
parente7e6809e6b38a5928efc0b5ca9520258e4d1fb3a (diff)
parent5c93b38c5fca514a08084e32feb8a8fb27bf3665 (diff)
Merged eigen/eigen into default
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h10
1 files changed, 8 insertions, 2 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index bb330a77b..b43db40c8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -87,13 +87,13 @@ struct ThreadPoolDevice {
const size_t kMinBlockSize = 32768;
typedef TensorCostModel<ThreadPoolDevice> CostModel;
const size_t num_threads = CostModel::numThreads(n, TensorOpCost(1.0, 1.0, 0), 4);
- if (n <= kMinBlockSize || num_threads == 1) {
+ if (n <= kMinBlockSize || num_threads < 2) {
::memcpy(dst, src, n);
} else {
const char* src_ptr = static_cast<const char*>(src);
char* dst_ptr = static_cast<char*>(dst);
const size_t blocksize = (n + (num_threads - 1)) / num_threads;
- Barrier barrier(num_threads - 1);
+ Barrier barrier(static_cast<int>(num_threads - 1));
// Launch the last 3 blocks on worker threads.
for (size_t i = 1; i < num_threads; ++i) {
enqueue_with_barrier(&barrier, [n, i, src_ptr, dst_ptr, blocksize] {
@@ -122,6 +122,12 @@ struct ThreadPoolDevice {
return num_threads_;
}
+ // Number of theads available in the underlying thread pool. This number can
+ // be different from the value returned by numThreads().
+ EIGEN_STRONG_INLINE int numThreadsInPool() const {
+ return pool_->NumThreads();
+ }
+
EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
return l1CacheSize();
}