diff options
Diffstat (limited to 'Eigen/src/Core/products/Parallelizer.h')
-rw-r--r-- | Eigen/src/Core/products/Parallelizer.h | 25 |
1 files changed, 12 insertions, 13 deletions
diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h index 5c3e9b7ac..4079063eb 100644 --- a/Eigen/src/Core/products/Parallelizer.h +++ b/Eigen/src/Core/products/Parallelizer.h @@ -73,13 +73,13 @@ namespace internal { template<typename Index> struct GemmParallelInfo { - GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {} + GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {} int volatile sync; int volatile users; - Index rhs_start; - Index rhs_length; + Index lhs_start; + Index lhs_length; }; template<bool Condition, typename Functor, typename Index> @@ -107,7 +107,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos if((!Condition) || (omp_get_num_threads()>1)) return func(0,rows, 0,cols); - Index size = transpose ? cols : rows; + Index size = transpose ? rows : cols; // 2- compute the maximal number of threads from the size of the product: // FIXME this has to be fine tuned @@ -126,26 +126,25 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos std::swap(rows,cols); Index blockCols = (cols / threads) & ~Index(0x3); - Index blockRows = (rows / threads) & ~Index(0x7); + Index blockRows = (rows / threads); + blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr; GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads]; - #pragma omp parallel for schedule(static,1) num_threads(threads) - for(Index i=0; i<threads; ++i) + #pragma omp parallel num_threads(threads) { + Index i = omp_get_thread_num(); Index r0 = i*blockRows; Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows; Index c0 = i*blockCols; Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols; - info[i].rhs_start = c0; - info[i].rhs_length = actualBlockCols; + info[i].lhs_start = r0; + info[i].lhs_length = actualBlockRows; - if(transpose) - func(0, cols, r0, actualBlockRows, info); - else - func(r0, actualBlockRows, 0,cols, info); + if(transpose) func(c0, actualBlockCols, 0, rows, info); + else func(0, rows, c0, actualBlockCols, info); } delete[] info; |