aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/products/Parallelizer.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/products/Parallelizer.h')
-rw-r--r--Eigen/src/Core/products/Parallelizer.h25
1 files changed, 12 insertions, 13 deletions
diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h
index 5c3e9b7ac..4079063eb 100644
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -73,13 +73,13 @@ namespace internal {
template<typename Index> struct GemmParallelInfo
{
- GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {}
+ GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
int volatile sync;
int volatile users;
- Index rhs_start;
- Index rhs_length;
+ Index lhs_start;
+ Index lhs_length;
};
template<bool Condition, typename Functor, typename Index>
@@ -107,7 +107,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
if((!Condition) || (omp_get_num_threads()>1))
return func(0,rows, 0,cols);
- Index size = transpose ? cols : rows;
+ Index size = transpose ? rows : cols;
// 2- compute the maximal number of threads from the size of the product:
// FIXME this has to be fine tuned
@@ -126,26 +126,25 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
std::swap(rows,cols);
Index blockCols = (cols / threads) & ~Index(0x3);
- Index blockRows = (rows / threads) & ~Index(0x7);
+ Index blockRows = (rows / threads);
+ blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads];
- #pragma omp parallel for schedule(static,1) num_threads(threads)
- for(Index i=0; i<threads; ++i)
+ #pragma omp parallel num_threads(threads)
{
+ Index i = omp_get_thread_num();
Index r0 = i*blockRows;
Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows;
Index c0 = i*blockCols;
Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols;
- info[i].rhs_start = c0;
- info[i].rhs_length = actualBlockCols;
+ info[i].lhs_start = r0;
+ info[i].lhs_length = actualBlockRows;
- if(transpose)
- func(0, cols, r0, actualBlockRows, info);
- else
- func(r0, actualBlockRows, 0,cols, info);
+ if(transpose) func(c0, actualBlockCols, 0, rows, info);
+ else func(0, rows, c0, actualBlockCols, info);
}
delete[] info;