diff options
author | 2010-03-05 10:04:17 +0100 | |
---|---|---|
committer | 2010-03-05 10:04:17 +0100 | |
commit | d13b877014928c80a7cf0ae2e563d4e2e60e2c3c (patch) | |
tree | 80230ef1b5945989064e622a0d3460d6ee61104c /Eigen/src/Core | |
parent | 24ef5fedcda79246b337780bc5da63be188e2a75 (diff) |
remove the 1D and 2D parallelizer, keep only the GEMM specialized one
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixMatrix.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/products/Parallelizer.h | 72 |
2 files changed, 5 insertions, 73 deletions
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index b20a16cd0..cbb389542 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -273,11 +273,9 @@ class GeneralProduct<Lhs, Rhs, GemmProduct> (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>, _ActualLhsType, _ActualRhsType, - Dest> Functor; + Dest> GemmFunctor; -// ei_run_parallel_1d<true>(Functor(lhs, rhs, dst, actualAlpha), this->rows()); -// ei_run_parallel_2d<true>(Functor(lhs, rhs, dst, actualAlpha), this->rows(), this->cols()); - ei_run_parallel_gemm<true>(Functor(lhs, rhs, dst, actualAlpha), this->rows(), this->cols()); + ei_parallelize_gemm<Dest::MaxRowsAtCompileTime>32>(GemmFunctor(lhs, rhs, dst, actualAlpha), this->rows(), this->cols()); } }; diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h index 439ce1565..62cf16047 100644 --- a/Eigen/src/Core/products/Parallelizer.h +++ b/Eigen/src/Core/products/Parallelizer.h @@ -25,71 +25,6 @@ #ifndef EIGEN_PARALLELIZER_H #define EIGEN_PARALLELIZER_H -template<bool Parallelize,typename Functor> -void ei_run_parallel_1d(const Functor& func, int size) -{ -#ifndef EIGEN_HAS_OPENMP - func(0,size); -#else - if(!Parallelize) - return func(0,size); - - int threads = omp_get_num_procs(); - int blockSize = size / threads; - #pragma omp parallel for schedule(static,1) - for(int i=0; i<threads; ++i) - { - int blockStart = i*blockSize; - int actualBlockSize = std::min(blockSize, size - blockStart); - - func(blockStart, actualBlockSize); - } -#endif -} - -template<bool Parallelize,typename Functor> -void ei_run_parallel_2d(const Functor& func, int size1, int size2) -{ -#ifndef EIGEN_HAS_OPENMP - func(0,size1, 0,size2); -#else - - int threads = omp_get_max_threads(); - if((!Parallelize)||(threads==1)) - return func(0,size1, 0,size2); - - // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 - static const int divide1[17] = { 0, 1, 2, 3, 2, 5, 3, 7, 4, 3, 5, 1, 4, 1, 7, 5, 4}; - static const int divide2[17] = { 0, 1, 1, 1, 2, 1, 2, 1, 2, 3, 2, 11, 3, 13, 2, 3, 4}; - - - - ei_assert(threads<=16 && "too many threads !"); - int blockSize1 = size1 / divide1[threads]; - int blockSize2 = size2 / divide2[threads]; - - Matrix<int,4,Dynamic> ranges(4,threads); - int k = 0; - for(int i1=0; i1<divide1[threads]; ++i1) - { - int blockStart1 = i1*blockSize1; - int actualBlockSize1 = std::min(blockSize1, size1 - blockStart1); - for(int i2=0; i2<divide2[threads]; ++i2) - { - int blockStart2 = i2*blockSize2; - int actualBlockSize2 = std::min(blockSize2, size2 - blockStart2); - ranges.col(k++) << blockStart1, actualBlockSize1, blockStart2, actualBlockSize2; - } - } - - #pragma omp parallel for schedule(static,1) - for(int i=0; i<threads; ++i) - { - func(ranges.col(i)[0],ranges.col(i)[1],ranges.col(i)[2],ranges.col(i)[3]); - } -#endif -} - struct GemmParallelInfo { GemmParallelInfo() : sync(-1), users(0) {} @@ -102,18 +37,17 @@ struct GemmParallelInfo float* blockB; }; -template<bool Parallelize,typename Functor> -void ei_run_parallel_gemm(const Functor& func, int rows, int cols) +template<bool Condition,typename Functor> +void ei_parallelize_gemm(const Functor& func, int rows, int cols) { #ifndef EIGEN_HAS_OPENMP func(0,rows, 0,cols); #else int threads = omp_get_max_threads(); - if((!Parallelize)||(threads==1)) + if((!Condition)||(threads==1)) return func(0,rows, 0,cols); - int blockCols = (cols / threads) & ~0x3; int blockRows = (rows / threads) & ~0x7; |