diff options
author | 2010-02-23 21:40:15 +0100 | |
---|---|---|
committer | 2010-02-23 21:40:15 +0100 | |
commit | a1e110332829a4bb38ca8e55608a2b048876018e (patch) | |
tree | 8725d7acea48e8fd8f5951f771727c7813a6f1e5 | |
parent | 022e2f5ef4513750a5b2960ba0e8e825d8f640bb (diff) |
add a 2D parallelizer
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixMatrix.h | 16 | ||||
-rw-r--r-- | Eigen/src/Core/products/Parallelizer.h | 40 |
2 files changed, 50 insertions, 6 deletions
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index d4f1f1913..84429a0d9 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -109,6 +109,8 @@ static void run(int rows, int cols, int depth, // Everything is packed, we can now call the block * panel kernel: ei_gebp_kernel<Scalar, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> >() (res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols); + +// sgemm_kernel(actual_mc, cols, actual_kc, alpha, blockA, allocatedBlockB, res+i2, resStride); } } @@ -137,12 +139,14 @@ struct ei_gemm_functor : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha) {} - void operator() (int start, int size) const + void operator() (int col, int cols, int row=0, int rows=-1) const { - Gemm::run(m_lhs.rows(), size, m_lhs.cols(), - (const Scalar*)&(m_lhs.const_cast_derived().coeffRef(0,0)), m_lhs.stride(), - (const Scalar*)&(m_rhs.const_cast_derived().coeffRef(0,start)), m_rhs.stride(), - (Scalar*)&(m_dest.coeffRef(0,start)), m_dest.stride(), + if(rows==-1) + rows = m_lhs.rows(); + Gemm::run(rows, cols, m_lhs.cols(), + (const Scalar*)&(m_lhs.const_cast_derived().coeffRef(row,0)), m_lhs.stride(), + (const Scalar*)&(m_rhs.const_cast_derived().coeffRef(0,col)), m_rhs.stride(), + (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.stride(), m_actualAlpha); } @@ -187,7 +191,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct> _ActualRhsType, Dest> Functor; - ei_run_parallel_1d<true>(Functor(lhs, rhs, dst, actualAlpha), this->cols()); + ei_run_parallel_2d<true>(Functor(lhs, rhs, dst, actualAlpha), this->cols(), this->rows()); } }; diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h index d555508b2..088e387f9 100644 --- a/Eigen/src/Core/products/Parallelizer.h +++ b/Eigen/src/Core/products/Parallelizer.h @@ -47,4 +47,44 @@ void ei_run_parallel_1d(const Functor& func, int size) #endif } +template<bool Parallelize,typename Functor> +void ei_run_parallel_2d(const Functor& func, int size1, int size2) +{ +#ifndef EIGEN_HAS_OPENMP + func(0,size1, 0,size2); +#else + if(!Parallelize) + return func(0,size1, 0,size2); + + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + static const int divide1[17] = { 0, 1, 2, 3, 2, 5, 3, 7, 4, 3, 5, 11, 4, 13, 7, 5, 4}; + static const int divide2[17] = { 0, 1, 1, 1, 2, 1, 2, 1, 2, 3, 2, 1, 3, 1, 2, 3, 4}; + + int threads = omp_get_num_procs(); + ei_assert(threads<=16 && "too many threads !"); + int blockSize1 = size1 / divide1[threads]; + int blockSize2 = size2 / divide2[threads]; + + Matrix<int,4,Dynamic> ranges(4,threads); + int k = 0; + for(int i1=0; i1<divide1[threads]; ++i1) + { + int blockStart1 = i1*blockSize1; + int actualBlockSize1 = std::min(blockSize1, size1 - blockStart1); + for(int i2=0; i2<divide2[threads]; ++i2) + { + int blockStart2 = i2*blockSize2; + int actualBlockSize2 = std::min(blockSize2, size2 - blockStart2); + ranges.col(k++) << blockStart1, actualBlockSize1, blockStart2, actualBlockSize2; + } + } + + #pragma omp parallel for schedule(static,1) + for(int i=0; i<threads; ++i) + { + func(ranges.col(i)[0],ranges.col(i)[1],ranges.col(i)[2],ranges.col(i)[3]); + } +#endif +} + #endif // EIGEN_GENERAL_MATRIX_MATRIX_H |