From a1e110332829a4bb38ca8e55608a2b048876018e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 23 Feb 2010 21:40:15 +0100 Subject: add a 2D parallelizer --- Eigen/src/Core/products/GeneralMatrixMatrix.h | 16 +++++++---- Eigen/src/Core/products/Parallelizer.h | 40 +++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index d4f1f1913..84429a0d9 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -109,6 +109,8 @@ static void run(int rows, int cols, int depth, // Everything is packed, we can now call the block * panel kernel: ei_gebp_kernel >() (res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols); + +// sgemm_kernel(actual_mc, cols, actual_kc, alpha, blockA, allocatedBlockB, res+i2, resStride); } } @@ -137,12 +139,14 @@ struct ei_gemm_functor : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha) {} - void operator() (int start, int size) const + void operator() (int col, int cols, int row=0, int rows=-1) const { - Gemm::run(m_lhs.rows(), size, m_lhs.cols(), - (const Scalar*)&(m_lhs.const_cast_derived().coeffRef(0,0)), m_lhs.stride(), - (const Scalar*)&(m_rhs.const_cast_derived().coeffRef(0,start)), m_rhs.stride(), - (Scalar*)&(m_dest.coeffRef(0,start)), m_dest.stride(), + if(rows==-1) + rows = m_lhs.rows(); + Gemm::run(rows, cols, m_lhs.cols(), + (const Scalar*)&(m_lhs.const_cast_derived().coeffRef(row,0)), m_lhs.stride(), + (const Scalar*)&(m_rhs.const_cast_derived().coeffRef(0,col)), m_rhs.stride(), + (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.stride(), m_actualAlpha); } @@ -187,7 +191,7 @@ class GeneralProduct _ActualRhsType, Dest> Functor; - ei_run_parallel_1d(Functor(lhs, rhs, dst, actualAlpha), this->cols()); + ei_run_parallel_2d(Functor(lhs, rhs, dst, actualAlpha), this->cols(), this->rows()); } }; diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h index d555508b2..088e387f9 100644 --- a/Eigen/src/Core/products/Parallelizer.h +++ b/Eigen/src/Core/products/Parallelizer.h @@ -47,4 +47,44 @@ void ei_run_parallel_1d(const Functor& func, int size) #endif } +template +void ei_run_parallel_2d(const Functor& func, int size1, int size2) +{ +#ifndef EIGEN_HAS_OPENMP + func(0,size1, 0,size2); +#else + if(!Parallelize) + return func(0,size1, 0,size2); + + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + static const int divide1[17] = { 0, 1, 2, 3, 2, 5, 3, 7, 4, 3, 5, 11, 4, 13, 7, 5, 4}; + static const int divide2[17] = { 0, 1, 1, 1, 2, 1, 2, 1, 2, 3, 2, 1, 3, 1, 2, 3, 4}; + + int threads = omp_get_num_procs(); + ei_assert(threads<=16 && "too many threads !"); + int blockSize1 = size1 / divide1[threads]; + int blockSize2 = size2 / divide2[threads]; + + Matrix ranges(4,threads); + int k = 0; + for(int i1=0; i1