aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2010-07-03 12:52:39 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2010-07-03 12:52:39 +0200
commitbe1fdbf3af7a5fafdab440f70646ee39a3dd9bd3 (patch)
treef29e510f0cdb58640eafa5ea00d4448080dd7b7a /Eigen/src
parent0d9dc578dd62bbd388fb111763eec257ad9db1be (diff)
fix openmp for row major destination
Diffstat (limited to 'Eigen/src')
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix.h6
-rw-r--r--Eigen/src/Core/products/Parallelizer.h14
2 files changed, 14 insertions, 6 deletions
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index 295dd8f25..39b283a3f 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -373,8 +373,8 @@ class ei_gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols
void allocateW()
{
- if(this->m_blockB==0)
- this->m_blockB = ei_aligned_new<RhsScalar>(m_sizeB);
+ if(this->m_blockW==0)
+ this->m_blockW = ei_aligned_new<RhsScalar>(m_sizeW);
}
void allocateAll()
@@ -432,7 +432,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
BlockingType blocking(dst.rows(), dst.cols(), lhs.cols());
- ei_parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols());
+ ei_parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit);
}
};
diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h
index e27c8edab..f307812bf 100644
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -81,7 +81,7 @@ template<typename Index> struct GemmParallelInfo
};
template<bool Condition, typename Functor, typename Index>
-void ei_parallelize_gemm(const Functor& func, Index rows, Index cols)
+void ei_parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
{
#ifndef EIGEN_HAS_OPENMP
func(0,rows, 0,cols);
@@ -98,9 +98,11 @@ void ei_parallelize_gemm(const Functor& func, Index rows, Index cols)
if((!Condition) || (omp_get_num_threads()>1))
return func(0,rows, 0,cols);
+ Index size = transpose ? cols : rows;
+
// 2- compute the maximal number of threads from the size of the product:
// FIXME this has to be fine tuned
- Index max_threads = std::max<Index>(1,rows / 32);
+ Index max_threads = std::max<Index>(1,size / 32);
// 3 - compute the number of threads we are going to use
Index threads = std::min<Index>(nbThreads(), max_threads);
@@ -110,6 +112,9 @@ void ei_parallelize_gemm(const Functor& func, Index rows, Index cols)
func.initParallelSession();
+ if(transpose)
+ std::swap(rows,cols);
+
Index blockCols = (cols / threads) & ~Index(0x3);
Index blockRows = (rows / threads) & ~Index(0x7);
@@ -127,7 +132,10 @@ void ei_parallelize_gemm(const Functor& func, Index rows, Index cols)
info[i].rhs_start = c0;
info[i].rhs_length = actualBlockCols;
- func(r0, actualBlockRows, 0,cols, info);
+ if(transpose)
+ func(0, cols, r0, actualBlockRows, info);
+ else
+ func(r0, actualBlockRows, 0,cols, info);
}
delete[] info;