add a 2D parallelizer

author: Gael Guennebaud <g.gael@free.fr> 2010-02-23 21:40:15 +0100
committer: Gael Guennebaud <g.gael@free.fr> 2010-02-23 21:40:15 +0100
commit: a1e110332829a4bb38ca8e55608a2b048876018e (patch)
tree: 8725d7acea48e8fd8f5951f771727c7813a6f1e5
parent: 022e2f5ef4513750a5b2960ba0e8e825d8f640bb (diff)
2 files changed, 50 insertions, 6 deletions
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index d4f1f1913..84429a0d9 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -109,6 +109,8 @@ static void run(int rows, int cols, int depth,
       // Everything is packed, we can now call the block * panel kernel:
       ei_gebp_kernel<Scalar, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> >()
         (res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols);
+
+//         sgemm_kernel(actual_mc, cols, actual_kc, alpha, blockA, allocatedBlockB, res+i2, resStride);
     }
   }
 
@@ -137,12 +139,14 @@ struct ei_gemm_functor
     : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha)
   {}
 
-  void operator() (int start, int size) const
+  void operator() (int col, int cols, int row=0, int rows=-1) const
   {
-    Gemm::run(m_lhs.rows(), size, m_lhs.cols(),
-              (const Scalar*)&(m_lhs.const_cast_derived().coeffRef(0,0)), m_lhs.stride(),
-              (const Scalar*)&(m_rhs.const_cast_derived().coeffRef(0,start)), m_rhs.stride(),
-              (Scalar*)&(m_dest.coeffRef(0,start)), m_dest.stride(),
+    if(rows==-1)
+      rows = m_lhs.rows();
+    Gemm::run(rows, cols, m_lhs.cols(),
+              (const Scalar*)&(m_lhs.const_cast_derived().coeffRef(row,0)), m_lhs.stride(),
+              (const Scalar*)&(m_rhs.const_cast_derived().coeffRef(0,col)), m_rhs.stride(),
+              (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.stride(),
               m_actualAlpha);
   }
 
@@ -187,7 +191,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
         _ActualRhsType,
         Dest> Functor;
 
-      ei_run_parallel_1d<true>(Functor(lhs, rhs, dst, actualAlpha), this->cols());
+      ei_run_parallel_2d<true>(Functor(lhs, rhs, dst, actualAlpha), this->cols(), this->rows());
     }
 };
 
diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h
index d555508b2..088e387f9 100644
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -47,4 +47,44 @@ void ei_run_parallel_1d(const Functor& func, int size)
 #endif
 }
 
+template<bool Parallelize,typename Functor>
+void ei_run_parallel_2d(const Functor& func, int size1, int size2)
+{
+#ifndef EIGEN_HAS_OPENMP
+  func(0,size1, 0,size2);
+#else
+  if(!Parallelize)
+    return func(0,size1, 0,size2);
+
+                                // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+  static const int divide1[17] = { 0, 1, 2, 3, 2, 5, 3, 7, 4, 3,  5, 11,  4, 13,  7,  5, 4};
+  static const int divide2[17] = { 0, 1, 1, 1, 2, 1, 2, 1, 2, 3,  2,  1,  3,  1,  2,  3, 4};
+
+  int threads = omp_get_num_procs();
+  ei_assert(threads<=16 && "too many threads !");
+  int blockSize1 = size1 / divide1[threads];
+  int blockSize2 = size2 / divide2[threads];
+  
+  Matrix<int,4,Dynamic> ranges(4,threads);
+  int k = 0;
+  for(int i1=0; i1<divide1[threads]; ++i1)
+  {
+    int blockStart1 = i1*blockSize1;
+    int actualBlockSize1 = std::min(blockSize1, size1 - blockStart1);
+    for(int i2=0; i2<divide2[threads]; ++i2)
+    {
+      int blockStart2 = i2*blockSize2;
+      int actualBlockSize2 = std::min(blockSize2, size2 - blockStart2);
+      ranges.col(k++) << blockStart1, actualBlockSize1, blockStart2, actualBlockSize2;
+    }
+  }
+  
+  #pragma omp parallel for schedule(static,1)
+  for(int i=0; i<threads; ++i)
+  {
+    func(ranges.col(i)[0],ranges.col(i)[1],ranges.col(i)[2],ranges.col(i)[3]);
+  }
+#endif
+}
+
 #endif // EIGEN_GENERAL_MATRIX_MATRIX_H
author	Gael Guennebaud <g.gael@free.fr>	2010-02-23 21:40:15 +0100
committer	Gael Guennebaud <g.gael@free.fr>	2010-02-23 21:40:15 +0100
commit	a1e110332829a4bb38ca8e55608a2b048876018e (patch)
tree	8725d7acea48e8fd8f5951f771727c7813a6f1e5
parent	022e2f5ef4513750a5b2960ba0e8e825d8f640bb (diff)