// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2010 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 3 of the License, or (at your option) any later version. // // Alternatively, you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2 of // the License, or (at your option) any later version. // // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the // GNU General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License and a copy of the GNU General Public License along with // Eigen. If not, see . #ifndef EIGEN_PARALLELIZER_H #define EIGEN_PARALLELIZER_H template void ei_run_parallel_1d(const Functor& func, int size) { #ifndef EIGEN_HAS_OPENMP func(0,size); #else if(!Parallelize) return func(0,size); int threads = omp_get_num_procs(); int blockSize = size / threads; #pragma omp parallel for schedule(static,1) for(int i=0; i void ei_run_parallel_2d(const Functor& func, int size1, int size2) { #ifndef EIGEN_HAS_OPENMP func(0,size1, 0,size2); #else int threads = omp_get_max_threads(); if((!Parallelize)||(threads==1)) return func(0,size1, 0,size2); // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 static const int divide1[17] = { 0, 1, 2, 3, 2, 5, 3, 7, 4, 3, 5, 1, 4, 1, 7, 5, 4}; static const int divide2[17] = { 0, 1, 1, 1, 2, 1, 2, 1, 2, 3, 2, 11, 3, 13, 2, 3, 4}; ei_assert(threads<=16 && "too many threads !"); int blockSize1 = size1 / divide1[threads]; int blockSize2 = size2 / divide2[threads]; Matrix ranges(4,threads); int k = 0; for(int i1=0; i1 void ei_run_parallel_gemm(const Functor& func, int rows, int cols) { #ifndef EIGEN_HAS_OPENMP func(0,rows, 0,cols); #else int threads = omp_get_max_threads(); if((!Parallelize)||(threads==1)) return func(0,rows, 0,cols); int blockCols = (cols / threads) & ~0x3; int blockRows = (rows / threads) & ~0x7; float* sharedBlockB = new float[2048*2048*4]; GemmParallelInfo* info = new GemmParallelInfo[threads]; #pragma omp parallel for schedule(static,1) for(int i=0; i