diff options
Diffstat (limited to 'bench')
-rw-r--r-- | bench/BenchSparseUtil.h | 2 | ||||
-rw-r--r-- | bench/BenchUtil.h | 40 | ||||
-rw-r--r-- | bench/basicbenchmark.cpp | 14 | ||||
-rw-r--r-- | bench/benchCholesky.cpp | 132 | ||||
-rw-r--r-- | bench/benchEigenSolver.cpp | 210 | ||||
-rwxr-xr-x | bench/bench_unrolling | 2 | ||||
-rw-r--r-- | bench/ompbench.cxxlist | 7 | ||||
-rw-r--r-- | bench/ompbenchmark.cpp | 81 |
8 files changed, 384 insertions, 104 deletions
diff --git a/bench/BenchSparseUtil.h b/bench/BenchSparseUtil.h index 9d88148d0..2c24c29e6 100644 --- a/bench/BenchSparseUtil.h +++ b/bench/BenchSparseUtil.h @@ -3,8 +3,6 @@ #include <Eigen/Sparse> #include <bench/BenchTimer.h> - - using namespace std; using namespace Eigen; USING_PART_OF_NAMESPACE_EIGEN diff --git a/bench/BenchUtil.h b/bench/BenchUtil.h index bb3c4611c..4afe61980 100644 --- a/bench/BenchUtil.h +++ b/bench/BenchUtil.h @@ -26,3 +26,43 @@ template<typename MatrixType> void initMatrix_identity(MatrixType& mat) { mat.setIdentity(); } + +#ifndef __INTEL_COMPILER +#define DISABLE_SSE_EXCEPTIONS() { \ + int aux; \ + asm( \ + "stmxcsr %[aux] \n\t" \ + "orl $32832, %[aux] \n\t" \ + "ldmxcsr %[aux] \n\t" \ + : : [aux] "m" (aux)); \ +} +#else +#define DISABLE_SSE_EXCEPTIONS() +#endif + +#ifdef BENCH_GMM +#include <gmm/gmm.h> +template <typename EigenMatrixType, typename GmmMatrixType> +void eiToGmm(const EigenMatrixType& src, GmmMatrixType& dst) +{ + dst.resize(src.rows(),src.cols()); + for (int j=0; j<src.cols(); ++j) + for (int i=0; i<src.rows(); ++i) + dst(i,j) = src.coeff(i,j); +} +#endif + + +#ifdef BENCH_GSL +#include <gsl/gsl_matrix.h> +#include <gsl/gsl_linalg.h> +#include <gsl/gsl_eigen.h> +template <typename EigenMatrixType> +void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst) +{ + for (int j=0; j<src.cols(); ++j) + for (int i=0; i<src.rows(); ++i) + gsl_matrix_set(*dst, i, j, src.coeff(i,j)); +} +#endif + diff --git a/bench/basicbenchmark.cpp b/bench/basicbenchmark.cpp index 25101270e..bd500c06a 100644 --- a/bench/basicbenchmark.cpp +++ b/bench/basicbenchmark.cpp @@ -4,19 +4,7 @@ int main(int argc, char *argv[]) { - // disable floating point exceptions - // this leads to more stable bench results - // (this is done by default by ICC) - #ifndef __INTEL_COMPILER - { - int aux; - asm( - "stmxcsr %[aux] \n\t" - "orl $32832, %[aux] \n\t" - "ldmxcsr %[aux] \n\t" - : : [aux] "m" (aux)); - } - #endif + DISABLE_SSE_EXCEPTIONS(); // this is the list of matrix type and size we want to bench: // ((suffix) (matrix size) (number of iterations)) diff --git a/bench/benchCholesky.cpp b/bench/benchCholesky.cpp new file mode 100644 index 000000000..88b52eb4e --- /dev/null +++ b/bench/benchCholesky.cpp @@ -0,0 +1,132 @@ + +// g++ -DNDEBUG -O3 -I.. benchCholesky.cpp -o benchCholesky && ./benchCholesky +// options: +// -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3 +// -DEIGEN_DONT_VECTORIZE +// -msse2 +// -DREPEAT=100 +// -DTRIES=10 +// -DSCALAR=double + +#include <Eigen/Array> +#include <Eigen/Cholesky> +#include <bench/BenchUtil.h> +using namespace Eigen; + +#ifndef REPEAT +#define REPEAT 10000 +#endif + +#ifndef TRIES +#define TRIES 4 +#endif + +typedef float Scalar; + +template <typename MatrixType> +__attribute__ ((noinline)) void benchCholesky(const MatrixType& m) +{ + int rows = m.rows(); + int cols = m.cols(); + + int repeats = (REPEAT*1000)/(rows*rows); + + typedef typename MatrixType::Scalar Scalar; + typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType; + + MatrixType a = MatrixType::random(rows,cols); + SquareMatrixType covMat = a * a.adjoint(); + + BenchTimer timerNoSqrt, timerSqrt; + + Scalar acc = 0; + int r = ei_random<int>(0,covMat.rows()-1); + int c = ei_random<int>(0,covMat.cols()-1); + for (int t=0; t<TRIES; ++t) + { + timerNoSqrt.start(); + for (int k=0; k<repeats; ++k) + { + CholeskyWithoutSquareRoot<SquareMatrixType> cholnosqrt(covMat); + acc += cholnosqrt.matrixL().coeff(r,c); + } + timerNoSqrt.stop(); + } + + for (int t=0; t<TRIES; ++t) + { + timerSqrt.start(); + for (int k=0; k<repeats; ++k) + { + Cholesky<SquareMatrixType> chol(covMat); + acc += chol.matrixL().coeff(r,c); + } + timerSqrt.stop(); + } + + if (MatrixType::RowsAtCompileTime==Dynamic) + std::cout << "dyn "; + else + std::cout << "fixed "; + std::cout << covMat.rows() << " \t" + << (timerNoSqrt.value() * REPEAT) / repeats << "s \t" + << (timerSqrt.value() * REPEAT) / repeats << "s"; + + + #ifdef BENCH_GSL + if (MatrixType::RowsAtCompileTime==Dynamic) + { + timerSqrt.reset(); + + gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols()); + gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols()); + + eiToGsl(covMat, &gslCovMat); + for (int t=0; t<TRIES; ++t) + { + timerSqrt.start(); + for (int k=0; k<repeats; ++k) + { + gsl_matrix_memcpy(gslCopy,gslCovMat); + gsl_linalg_cholesky_decomp(gslCopy); + acc += gsl_matrix_get(gslCopy,r,c); + } + timerSqrt.stop(); + } + + std::cout << " | \t" + << timerSqrt.value() * REPEAT / repeats << "s"; + + gsl_matrix_free(gslCovMat); + } + #endif + std::cout << "\n"; + // make sure the compiler does not optimize too much + if (acc==123) + std::cout << acc; +} + +int main(int argc, char* argv[]) +{ + const int dynsizes[] = {4,6,8,12,16,24,32,64,128,256,512,0}; + std::cout << "size no sqrt standard"; + #ifdef BENCH_GSL + std::cout << " GSL (standard + double + ATLAS) "; + #endif + std::cout << "\n"; + + for (uint i=0; dynsizes[i]>0; ++i) + benchCholesky(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i])); + + benchCholesky(Matrix<Scalar,2,2>()); + benchCholesky(Matrix<Scalar,3,3>()); + benchCholesky(Matrix<Scalar,4,4>()); + benchCholesky(Matrix<Scalar,5,5>()); + benchCholesky(Matrix<Scalar,6,6>()); + benchCholesky(Matrix<Scalar,7,7>()); + benchCholesky(Matrix<Scalar,8,8>()); + benchCholesky(Matrix<Scalar,12,12>()); + benchCholesky(Matrix<Scalar,16,16>()); + return 0; +} + diff --git a/bench/benchEigenSolver.cpp b/bench/benchEigenSolver.cpp new file mode 100644 index 000000000..a62ee41f4 --- /dev/null +++ b/bench/benchEigenSolver.cpp @@ -0,0 +1,210 @@ + +// g++ -DNDEBUG -O3 -I.. benchEigenSolver.cpp -o benchEigenSolver && ./benchEigenSolver +// options: +// -DBENCH_GMM +// -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3 +// -DEIGEN_DONT_VECTORIZE +// -msse2 +// -DREPEAT=100 +// -DTRIES=10 +// -DSCALAR=double + +#include <Eigen/Array> +#include <Eigen/QR> +#include <bench/BenchUtil.h> +using namespace Eigen; + +#ifndef REPEAT +#define REPEAT 1000 +#endif + +#ifndef TRIES +#define TRIES 4 +#endif + +#ifndef SCALAR +#define SCALAR float +#endif + +typedef SCALAR Scalar; + +template <typename MatrixType> +__attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m) +{ + int rows = m.rows(); + int cols = m.cols(); + + int stdRepeats = std::max(1,int((REPEAT*1000)/(rows*rows*sqrt(rows)))); + int saRepeats = stdRepeats * 4; + + typedef typename MatrixType::Scalar Scalar; + typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType; + + MatrixType a = MatrixType::random(rows,cols); + SquareMatrixType covMat = a * a.adjoint(); + + BenchTimer timerSa, timerStd; + + Scalar acc = 0; + int r = ei_random<int>(0,covMat.rows()-1); + int c = ei_random<int>(0,covMat.cols()-1); + { + SelfAdjointEigenSolver<SquareMatrixType> ei(covMat); + for (int t=0; t<TRIES; ++t) + { + timerSa.start(); + for (int k=0; k<saRepeats; ++k) + { + ei.compute(covMat); + acc += ei.eigenvectors().coeff(r,c); + } + timerSa.stop(); + } + } + + { + EigenSolver<SquareMatrixType> ei(covMat); + for (int t=0; t<TRIES; ++t) + { + timerStd.start(); + for (int k=0; k<stdRepeats; ++k) + { + ei.compute(covMat); + acc += ei.eigenvectors().coeff(r,c); + } + timerStd.stop(); + } + } + + if (MatrixType::RowsAtCompileTime==Dynamic) + std::cout << "dyn "; + else + std::cout << "fixed "; + std::cout << covMat.rows() << " \t" + << timerSa.value() * REPEAT / saRepeats << "s \t" + << timerStd.value() * REPEAT / stdRepeats << "s"; + + #ifdef BENCH_GMM + if (MatrixType::RowsAtCompileTime==Dynamic) + { + timerSa.reset(); + timerStd.reset(); + + gmm::dense_matrix<Scalar> gmmCovMat(covMat.rows(),covMat.cols()); + gmm::dense_matrix<Scalar> eigvect(covMat.rows(),covMat.cols()); + std::vector<Scalar> eigval(covMat.rows()); + eiToGmm(covMat, gmmCovMat); + for (int t=0; t<TRIES; ++t) + { + timerSa.start(); + for (int k=0; k<saRepeats; ++k) + { + gmm::symmetric_qr_algorithm(gmmCovMat, eigval, eigvect); + acc += eigvect(r,c); + } + timerSa.stop(); + } + // the non-selfadjoint solver does not compute the eigen vectors +// for (int t=0; t<TRIES; ++t) +// { +// timerStd.start(); +// for (int k=0; k<stdRepeats; ++k) +// { +// gmm::implicit_qr_algorithm(gmmCovMat, eigval, eigvect); +// acc += eigvect(r,c); +// } +// timerStd.stop(); +// } + + std::cout << " | \t" + << timerSa.value() * REPEAT / saRepeats << "s" + << /*timerStd.value() * REPEAT / stdRepeats << "s"*/ " na "; + } + #endif + + #ifdef BENCH_GSL + if (MatrixType::RowsAtCompileTime==Dynamic) + { + timerSa.reset(); + timerStd.reset(); + + gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols()); + gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols()); + gsl_matrix* eigvect = gsl_matrix_alloc(covMat.rows(),covMat.cols()); + gsl_vector* eigval = gsl_vector_alloc(covMat.rows()); + gsl_eigen_symmv_workspace* eisymm = gsl_eigen_symmv_alloc(covMat.rows()); + + gsl_matrix_complex* eigvectz = gsl_matrix_complex_alloc(covMat.rows(),covMat.cols()); + gsl_vector_complex* eigvalz = gsl_vector_complex_alloc(covMat.rows()); + gsl_eigen_nonsymmv_workspace* einonsymm = gsl_eigen_nonsymmv_alloc(covMat.rows()); + + eiToGsl(covMat, &gslCovMat); + for (int t=0; t<TRIES; ++t) + { + timerSa.start(); + for (int k=0; k<saRepeats; ++k) + { + gsl_matrix_memcpy(gslCopy,gslCovMat); + gsl_eigen_symmv(gslCopy, eigval, eigvect, eisymm); + acc += gsl_matrix_get(eigvect,r,c); + } + timerSa.stop(); + } + for (int t=0; t<TRIES; ++t) + { + timerStd.start(); + for (int k=0; k<stdRepeats; ++k) + { + gsl_matrix_memcpy(gslCopy,gslCovMat); + gsl_eigen_nonsymmv(gslCopy, eigvalz, eigvectz, einonsymm); + acc += GSL_REAL(gsl_matrix_complex_get(eigvectz,r,c)); + } + timerStd.stop(); + } + + std::cout << " | \t" + << timerSa.value() * REPEAT / saRepeats << "s \t" + << timerStd.value() * REPEAT / stdRepeats << "s"; + + gsl_matrix_free(gslCovMat); + gsl_vector_free(gslCopy); + gsl_matrix_free(eigvect); + gsl_vector_free(eigval); + gsl_matrix_complex_free(eigvectz); + gsl_vector_complex_free(eigvalz); + gsl_eigen_symmv_free(eisymm); + gsl_eigen_nonsymmv_free(einonsymm); + } + #endif + + std::cout << "\n"; + + // make sure the compiler does not optimize too much + if (acc==123) + std::cout << acc; +} + +int main(int argc, char* argv[]) +{ + const int dynsizes[] = {4,6,8,12,16,24,32,64,128,256,512,0}; + std::cout << "size selfadjoint generic"; + #ifdef BENCH_GMM + std::cout << " GMM++ "; + #endif + #ifdef BENCH_GSL + std::cout << " GSL (double + ATLAS) "; + #endif + std::cout << "\n"; + for (uint i=0; dynsizes[i]>0; ++i) + benchEigenSolver(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i])); + + benchEigenSolver(Matrix<Scalar,2,2>()); + benchEigenSolver(Matrix<Scalar,3,3>()); + benchEigenSolver(Matrix<Scalar,4,4>()); + benchEigenSolver(Matrix<Scalar,6,6>()); + benchEigenSolver(Matrix<Scalar,8,8>()); + benchEigenSolver(Matrix<Scalar,12,12>()); + benchEigenSolver(Matrix<Scalar,16,16>()); + return 0; +} + diff --git a/bench/bench_unrolling b/bench/bench_unrolling index 7934031a8..bf01cce7d 100755 --- a/bench/bench_unrolling +++ b/bench/bench_unrolling @@ -5,7 +5,7 @@ for ((i=1; i<16; ++i)); do echo "Matrix size: $i x $i :" - $CXX -O3 -I.. -DNDEBUG benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=1024 -DEIGEN_UNROLLING_LIMIT=25 -o benchmark && time ./benchmark >/dev/null + $CXX -O3 -I.. -DNDEBUG benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=1024 -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null $CXX -O3 -I.. -DNDEBUG -finline-limit=10000 benchmark.cpp -DMATSIZE=$i -DEIGEN_DONT_USE_UNROLLED_LOOPS=1 -o benchmark && time ./benchmark >/dev/null echo " " done diff --git a/bench/ompbench.cxxlist b/bench/ompbench.cxxlist deleted file mode 100644 index fc6681d33..000000000 --- a/bench/ompbench.cxxlist +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp" - -# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=10000 -fopenmp" - -CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp"
\ No newline at end of file diff --git a/bench/ompbenchmark.cpp b/bench/ompbenchmark.cpp deleted file mode 100644 index ac5155cb8..000000000 --- a/bench/ompbenchmark.cpp +++ /dev/null @@ -1,81 +0,0 @@ -// g++ -O3 -DNDEBUG -I.. -fopenmp benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null -// icpc -fast -fno-exceptions -DNDEBUG -I.. -openmp benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null - -#include <omp.h> -#include "BenchUtil.h" -#include "basicbenchmark.h" - -// #include <Eigen/Core> -// #include "BenchTimer.h" -// -// using namespace std; -// USING_PART_OF_NAMESPACE_EIGEN -// -// enum {LazyEval, EarlyEval, OmpEval}; -// -// template<int Mode, typename MatrixType> -// double benchSingleProc(const MatrixType& mat, int iterations, int tries) __attribute__((noinline)); -// -// template<int Mode, typename MatrixType> -// double benchBasic(const MatrixType& mat, int iterations, int tries) -// { -// const int rows = mat.rows(); -// const int cols = mat.cols(); -// -// Eigen::BenchTimer timer; -// for(uint t=0; t<tries; ++t) -// { -// MatrixType I = MatrixType::identity(rows, cols); -// MatrixType m = MatrixType::random(rows, cols); -// -// timer.start(); -// for(int a = 0; a < iterations; a++) -// { -// if(Mode==LazyEval) -// m = (I + 0.00005 * (m + m.lazyProduct(m))).eval(); -// else if(Mode==OmpEval) -// m = (I + 0.00005 * (m + m.lazyProduct(m))).evalOMP(); -// else -// m = I + 0.00005 * (m + m * m); -// } -// timer.stop(); -// cerr << m; -// } -// return timer.value(); -// }; - -int main(int argc, char *argv[]) -{ - // disbale floating point exceptions - // this leads to more stable bench results - { - int aux; - asm( - "stmxcsr %[aux] \n\t" - "orl $32832, %[aux] \n\t" - "ldmxcsr %[aux] \n\t" - : : [aux] "m" (aux)); - } - - // commented since the default setting is use as many threads as processors - //omp_set_num_threads(omp_get_num_procs()); - - std::cout << "double, fixed-size 4x4: " - << benchBasic<LazyEval>(Matrix4d(), 10000, 10) << "s " - << benchBasic<OmpEval>(Matrix4d(), 10000, 10) << "s \n"; - - #define BENCH_MATRIX(TYPE, SIZE, ITERATIONS, TRIES) {\ - double single = benchBasic<LazyEval>(Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \ - double omp = benchBasic<OmpEval> (Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \ - std::cout << #TYPE << ", " << #SIZE << "x" << #SIZE << ": " << single << "s " << omp << "s " \ - << " => x" << single/omp << " (" << omp_get_num_procs() << ")" << std::endl; \ - } - - BENCH_MATRIX(double, 32, 1000, 10); - BENCH_MATRIX(double, 128, 10, 10); - BENCH_MATRIX(double, 512, 1, 6); - BENCH_MATRIX(double, 1024, 1, 4); - - return 0; -} - |