aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/products/GeneralMatrixMatrix.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2015-02-18 15:19:23 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2015-02-18 15:19:23 +0100
commitc7bb1e8ea8dfc984788d0cb77b82a90468393c2e (patch)
tree7c215b4e3975cfb9d23df098bf76c0ed9bcb3b89 /Eigen/src/Core/products/GeneralMatrixMatrix.h
parent548b7813805d9e314f97eb6f731d711df663a46b (diff)
Fix a regression when using OpenMP, and fix bug #714: the number of threads might be lower than the number of requested ones
Diffstat (limited to 'Eigen/src/Core/products/GeneralMatrixMatrix.h')
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix.h25
1 files changed, 21 insertions, 4 deletions
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index 44e44b986..c38c12c31 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -217,8 +217,9 @@ struct gemm_functor
: m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
{}
- void initParallelSession() const
+ void initParallelSession(Index num_threads) const
{
+ m_blocking.initParallel(m_lhs.rows(), m_rhs.cols(), m_lhs.cols(), num_threads);
m_blocking.allocateA();
}
@@ -276,7 +277,7 @@ class level3_blocking
};
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
-class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true>
+class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true /* == FiniteAtCompileTime */>
: public level3_blocking<
typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
@@ -299,7 +300,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
public:
- gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, int /*num_threads*/, bool /*full_rows = false*/)
+ gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, Index /*num_threads*/, bool /*full_rows = false*/)
{
this->m_mc = ActualRows;
this->m_nc = ActualCols;
@@ -307,6 +308,9 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
this->m_blockA = m_staticA;
this->m_blockB = m_staticB;
}
+
+ void initParallel(Index, Index, Index, Index)
+ {}
inline void allocateA() {}
inline void allocateB() {}
@@ -331,7 +335,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
public:
- gemm_blocking_space(Index rows, Index cols, Index depth, int num_threads, bool l3_blocking)
+ gemm_blocking_space(Index rows, Index cols, Index depth, Index num_threads, bool l3_blocking)
{
this->m_mc = Transpose ? cols : rows;
this->m_nc = Transpose ? rows : cols;
@@ -351,6 +355,19 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
m_sizeA = this->m_mc * this->m_kc;
m_sizeB = this->m_kc * this->m_nc;
}
+
+ void initParallel(Index rows, Index cols, Index depth, Index num_threads)
+ {
+ this->m_mc = Transpose ? cols : rows;
+ this->m_nc = Transpose ? rows : cols;
+ this->m_kc = depth;
+
+ eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
+ Index m = this->m_mc;
+ computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);
+ m_sizeA = this->m_mc * this->m_kc;
+ m_sizeB = this->m_kc * this->m_nc;
+ }
void allocateA()
{