diff options
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h | 12 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 10 |
2 files changed, 11 insertions, 11 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index a60a17049..ee16cde9b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -202,7 +202,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT // across k dimension. const TensorOpCost cost = contractionCost(m, n, bm, bn, bk, shard_by_col, false); - Index num_threads = TensorCostModel<ThreadPoolDevice>::numThreads( + int num_threads = TensorCostModel<ThreadPoolDevice>::numThreads( static_cast<double>(n) * m, cost, this->m_device.numThreads()); // TODO(dvyukov): this is a stop-gap to prevent regressions while the cost @@ -301,7 +301,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT class Context { public: Context(const Device& device, int num_threads, LhsMapper& lhs, - RhsMapper& rhs, Scalar* buffer, Index m, Index n, Index k, Index bm, + RhsMapper& rhs, Scalar* buffer, Index tm, Index tn, Index tk, Index bm, Index bn, Index bk, Index nm, Index nn, Index nk, Index gm, Index gn, Index nm0, Index nn0, bool shard_by_col, bool parallel_pack) @@ -309,13 +309,13 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT lhs_(lhs), rhs_(rhs), buffer_(buffer), - output_(buffer, m), + output_(buffer, tm), num_threads_(num_threads), shard_by_col_(shard_by_col), parallel_pack_(parallel_pack), - m_(m), - n_(n), - k_(k), + m_(tm), + n_(tn), + k_(tk), bm_(bm), bn_(bn), bk_(bk), diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index d31b0ad38..c770d024f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -106,7 +106,7 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) { // Build a thread pool device on top the an existing pool of threads. struct ThreadPoolDevice { // The ownership of the thread pool remains with the caller. - ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { } + ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) : pool_(pool), num_threads_(num_cores) { } EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { return internal::aligned_malloc(num_bytes); @@ -130,7 +130,7 @@ struct ThreadPoolDevice { ::memset(buffer, c, n); } - EIGEN_STRONG_INLINE size_t numThreads() const { + EIGEN_STRONG_INLINE int numThreads() const { return num_threads_; } @@ -182,7 +182,7 @@ struct ThreadPoolDevice { std::function<void(Index, Index)> f) const { typedef TensorCostModel<ThreadPoolDevice> CostModel; if (n <= 1 || numThreads() == 1 || - CostModel::numThreads(n, cost, numThreads()) == 1) { + CostModel::numThreads(n, cost, static_cast<int>(numThreads())) == 1) { f(0, n); return; } @@ -242,7 +242,7 @@ struct ThreadPoolDevice { // Recursively divide size into halves until we reach block_size. // Division code rounds mid to block_size, so we are guaranteed to get // block_count leaves that do actual computations. - Barrier barrier(block_count); + Barrier barrier(static_cast<unsigned int>(block_count)); std::function<void(Index, Index)> handleRange; handleRange = [=, &handleRange, &barrier, &f](Index first, Index last) { if (last - first <= block_size) { @@ -268,7 +268,7 @@ struct ThreadPoolDevice { private: ThreadPoolInterface* pool_; - size_t num_threads_; + int num_threads_; }; |