aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2010-03-05 10:04:17 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2010-03-05 10:04:17 +0100
commitd13b877014928c80a7cf0ae2e563d4e2e60e2c3c (patch)
tree80230ef1b5945989064e622a0d3460d6ee61104c /Eigen/src/Core
parent24ef5fedcda79246b337780bc5da63be188e2a75 (diff)
remove the 1D and 2D parallelizer, keep only the GEMM specialized one
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix.h6
-rw-r--r--Eigen/src/Core/products/Parallelizer.h72
2 files changed, 5 insertions, 73 deletions
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index b20a16cd0..cbb389542 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -273,11 +273,9 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>,
_ActualLhsType,
_ActualRhsType,
- Dest> Functor;
+ Dest> GemmFunctor;
-// ei_run_parallel_1d<true>(Functor(lhs, rhs, dst, actualAlpha), this->rows());
-// ei_run_parallel_2d<true>(Functor(lhs, rhs, dst, actualAlpha), this->rows(), this->cols());
- ei_run_parallel_gemm<true>(Functor(lhs, rhs, dst, actualAlpha), this->rows(), this->cols());
+ ei_parallelize_gemm<Dest::MaxRowsAtCompileTime>32>(GemmFunctor(lhs, rhs, dst, actualAlpha), this->rows(), this->cols());
}
};
diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h
index 439ce1565..62cf16047 100644
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -25,71 +25,6 @@
#ifndef EIGEN_PARALLELIZER_H
#define EIGEN_PARALLELIZER_H
-template<bool Parallelize,typename Functor>
-void ei_run_parallel_1d(const Functor& func, int size)
-{
-#ifndef EIGEN_HAS_OPENMP
- func(0,size);
-#else
- if(!Parallelize)
- return func(0,size);
-
- int threads = omp_get_num_procs();
- int blockSize = size / threads;
- #pragma omp parallel for schedule(static,1)
- for(int i=0; i<threads; ++i)
- {
- int blockStart = i*blockSize;
- int actualBlockSize = std::min(blockSize, size - blockStart);
-
- func(blockStart, actualBlockSize);
- }
-#endif
-}
-
-template<bool Parallelize,typename Functor>
-void ei_run_parallel_2d(const Functor& func, int size1, int size2)
-{
-#ifndef EIGEN_HAS_OPENMP
- func(0,size1, 0,size2);
-#else
-
- int threads = omp_get_max_threads();
- if((!Parallelize)||(threads==1))
- return func(0,size1, 0,size2);
-
- // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
- static const int divide1[17] = { 0, 1, 2, 3, 2, 5, 3, 7, 4, 3, 5, 1, 4, 1, 7, 5, 4};
- static const int divide2[17] = { 0, 1, 1, 1, 2, 1, 2, 1, 2, 3, 2, 11, 3, 13, 2, 3, 4};
-
-
-
- ei_assert(threads<=16 && "too many threads !");
- int blockSize1 = size1 / divide1[threads];
- int blockSize2 = size2 / divide2[threads];
-
- Matrix<int,4,Dynamic> ranges(4,threads);
- int k = 0;
- for(int i1=0; i1<divide1[threads]; ++i1)
- {
- int blockStart1 = i1*blockSize1;
- int actualBlockSize1 = std::min(blockSize1, size1 - blockStart1);
- for(int i2=0; i2<divide2[threads]; ++i2)
- {
- int blockStart2 = i2*blockSize2;
- int actualBlockSize2 = std::min(blockSize2, size2 - blockStart2);
- ranges.col(k++) << blockStart1, actualBlockSize1, blockStart2, actualBlockSize2;
- }
- }
-
- #pragma omp parallel for schedule(static,1)
- for(int i=0; i<threads; ++i)
- {
- func(ranges.col(i)[0],ranges.col(i)[1],ranges.col(i)[2],ranges.col(i)[3]);
- }
-#endif
-}
-
struct GemmParallelInfo
{
GemmParallelInfo() : sync(-1), users(0) {}
@@ -102,18 +37,17 @@ struct GemmParallelInfo
float* blockB;
};
-template<bool Parallelize,typename Functor>
-void ei_run_parallel_gemm(const Functor& func, int rows, int cols)
+template<bool Condition,typename Functor>
+void ei_parallelize_gemm(const Functor& func, int rows, int cols)
{
#ifndef EIGEN_HAS_OPENMP
func(0,rows, 0,cols);
#else
int threads = omp_get_max_threads();
- if((!Parallelize)||(threads==1))
+ if((!Condition)||(threads==1))
return func(0,rows, 0,cols);
-
int blockCols = (cols / threads) & ~0x3;
int blockRows = (rows / threads) & ~0x7;