diff options
Diffstat (limited to 'Eigen/src/Core/products/Parallelizer.h')
-rw-r--r-- | Eigen/src/Core/products/Parallelizer.h | 19 |
1 files changed, 11 insertions, 8 deletions
diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h index 2b90abf8f..91d37a123 100644 --- a/Eigen/src/Core/products/Parallelizer.h +++ b/Eigen/src/Core/products/Parallelizer.h @@ -120,25 +120,28 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos return func(0,rows, 0,cols); Eigen::initParallel(); - func.initParallelSession(); + func.initParallelSession(threads); if(transpose) std::swap(rows,cols); - - Index blockCols = (cols / threads) & ~Index(0x3); - Index blockRows = (rows / threads); - blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr; ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0); - + #pragma omp parallel num_threads(threads) { Index i = omp_get_thread_num(); + // Note that the actual number of threads might be lower than the number of request ones. + Index actual_threads = omp_get_num_threads(); + + Index blockCols = (cols / actual_threads) & ~Index(0x3); + Index blockRows = (rows / actual_threads); + blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr; + Index r0 = i*blockRows; - Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows; + Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows; Index c0 = i*blockCols; - Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols; + Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols; info[i].lhs_start = r0; info[i].lhs_length = actualBlockRows; |