From c7bb1e8ea8dfc984788d0cb77b82a90468393c2e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 18 Feb 2015 15:19:23 +0100 Subject: Fix a regression when using OpenMP, and fix bug #714: the number of threads might be lower than the number of requested ones --- Eigen/src/Core/products/GeneralMatrixMatrix.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'Eigen/src/Core/products/GeneralMatrixMatrix.h') diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 44e44b986..c38c12c31 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -217,8 +217,9 @@ struct gemm_functor : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking) {} - void initParallelSession() const + void initParallelSession(Index num_threads) const { + m_blocking.initParallel(m_lhs.rows(), m_rhs.cols(), m_lhs.cols(), num_threads); m_blocking.allocateA(); } @@ -276,7 +277,7 @@ class level3_blocking }; template -class gemm_blocking_space +class gemm_blocking_space : public level3_blocking< typename conditional::type, typename conditional::type> @@ -299,7 +300,7 @@ class gemm_blocking_spacem_mc = ActualRows; this->m_nc = ActualCols; @@ -307,6 +308,9 @@ class gemm_blocking_spacem_blockA = m_staticA; this->m_blockB = m_staticB; } + + void initParallel(Index, Index, Index, Index) + {} inline void allocateA() {} inline void allocateB() {} @@ -331,7 +335,7 @@ class gemm_blocking_spacem_mc = Transpose ? cols : rows; this->m_nc = Transpose ? rows : cols; @@ -351,6 +355,19 @@ class gemm_blocking_spacem_mc * this->m_kc; m_sizeB = this->m_kc * this->m_nc; } + + void initParallel(Index rows, Index cols, Index depth, Index num_threads) + { + this->m_mc = Transpose ? cols : rows; + this->m_nc = Transpose ? rows : cols; + this->m_kc = depth; + + eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0); + Index m = this->m_mc; + computeProductBlockingSizes(this->m_kc, m, this->m_nc, num_threads); + m_sizeA = this->m_mc * this->m_kc; + m_sizeB = this->m_kc * this->m_nc; + } void allocateA() { -- cgit v1.2.3