aboutsummaryrefslogtreecommitdiffhomepage
path: root/bench
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2008-07-08 17:20:17 +0000
committerGravatar Gael Guennebaud <g.gael@free.fr>2008-07-08 17:20:17 +0000
commit77a622f2bb3356ee005a9413f6436373ec06efc2 (patch)
tree14b044c01f19c01102cc275e93e3e6abb6923a39 /bench
parent6f09d3a67d333d68e7c971147ec77600e86e93f3 (diff)
add Cholesky and eigensolver benchmark
Diffstat (limited to 'bench')
-rw-r--r--bench/BenchSparseUtil.h2
-rw-r--r--bench/BenchUtil.h40
-rw-r--r--bench/basicbenchmark.cpp14
-rw-r--r--bench/benchCholesky.cpp132
-rw-r--r--bench/benchEigenSolver.cpp210
-rwxr-xr-xbench/bench_unrolling2
-rw-r--r--bench/ompbench.cxxlist7
-rw-r--r--bench/ompbenchmark.cpp81
8 files changed, 384 insertions, 104 deletions
diff --git a/bench/BenchSparseUtil.h b/bench/BenchSparseUtil.h
index 9d88148d0..2c24c29e6 100644
--- a/bench/BenchSparseUtil.h
+++ b/bench/BenchSparseUtil.h
@@ -3,8 +3,6 @@
#include <Eigen/Sparse>
#include <bench/BenchTimer.h>
-
-
using namespace std;
using namespace Eigen;
USING_PART_OF_NAMESPACE_EIGEN
diff --git a/bench/BenchUtil.h b/bench/BenchUtil.h
index bb3c4611c..4afe61980 100644
--- a/bench/BenchUtil.h
+++ b/bench/BenchUtil.h
@@ -26,3 +26,43 @@ template<typename MatrixType> void initMatrix_identity(MatrixType& mat)
{
mat.setIdentity();
}
+
+#ifndef __INTEL_COMPILER
+#define DISABLE_SSE_EXCEPTIONS() { \
+ int aux; \
+ asm( \
+ "stmxcsr %[aux] \n\t" \
+ "orl $32832, %[aux] \n\t" \
+ "ldmxcsr %[aux] \n\t" \
+ : : [aux] "m" (aux)); \
+}
+#else
+#define DISABLE_SSE_EXCEPTIONS()
+#endif
+
+#ifdef BENCH_GMM
+#include <gmm/gmm.h>
+template <typename EigenMatrixType, typename GmmMatrixType>
+void eiToGmm(const EigenMatrixType& src, GmmMatrixType& dst)
+{
+ dst.resize(src.rows(),src.cols());
+ for (int j=0; j<src.cols(); ++j)
+ for (int i=0; i<src.rows(); ++i)
+ dst(i,j) = src.coeff(i,j);
+}
+#endif
+
+
+#ifdef BENCH_GSL
+#include <gsl/gsl_matrix.h>
+#include <gsl/gsl_linalg.h>
+#include <gsl/gsl_eigen.h>
+template <typename EigenMatrixType>
+void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst)
+{
+ for (int j=0; j<src.cols(); ++j)
+ for (int i=0; i<src.rows(); ++i)
+ gsl_matrix_set(*dst, i, j, src.coeff(i,j));
+}
+#endif
+
diff --git a/bench/basicbenchmark.cpp b/bench/basicbenchmark.cpp
index 25101270e..bd500c06a 100644
--- a/bench/basicbenchmark.cpp
+++ b/bench/basicbenchmark.cpp
@@ -4,19 +4,7 @@
int main(int argc, char *argv[])
{
- // disable floating point exceptions
- // this leads to more stable bench results
- // (this is done by default by ICC)
- #ifndef __INTEL_COMPILER
- {
- int aux;
- asm(
- "stmxcsr %[aux] \n\t"
- "orl $32832, %[aux] \n\t"
- "ldmxcsr %[aux] \n\t"
- : : [aux] "m" (aux));
- }
- #endif
+ DISABLE_SSE_EXCEPTIONS();
// this is the list of matrix type and size we want to bench:
// ((suffix) (matrix size) (number of iterations))
diff --git a/bench/benchCholesky.cpp b/bench/benchCholesky.cpp
new file mode 100644
index 000000000..88b52eb4e
--- /dev/null
+++ b/bench/benchCholesky.cpp
@@ -0,0 +1,132 @@
+
+// g++ -DNDEBUG -O3 -I.. benchCholesky.cpp -o benchCholesky && ./benchCholesky
+// options:
+// -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
+// -DEIGEN_DONT_VECTORIZE
+// -msse2
+// -DREPEAT=100
+// -DTRIES=10
+// -DSCALAR=double
+
+#include <Eigen/Array>
+#include <Eigen/Cholesky>
+#include <bench/BenchUtil.h>
+using namespace Eigen;
+
+#ifndef REPEAT
+#define REPEAT 10000
+#endif
+
+#ifndef TRIES
+#define TRIES 4
+#endif
+
+typedef float Scalar;
+
+template <typename MatrixType>
+__attribute__ ((noinline)) void benchCholesky(const MatrixType& m)
+{
+ int rows = m.rows();
+ int cols = m.cols();
+
+ int repeats = (REPEAT*1000)/(rows*rows);
+
+ typedef typename MatrixType::Scalar Scalar;
+ typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
+
+ MatrixType a = MatrixType::random(rows,cols);
+ SquareMatrixType covMat = a * a.adjoint();
+
+ BenchTimer timerNoSqrt, timerSqrt;
+
+ Scalar acc = 0;
+ int r = ei_random<int>(0,covMat.rows()-1);
+ int c = ei_random<int>(0,covMat.cols()-1);
+ for (int t=0; t<TRIES; ++t)
+ {
+ timerNoSqrt.start();
+ for (int k=0; k<repeats; ++k)
+ {
+ CholeskyWithoutSquareRoot<SquareMatrixType> cholnosqrt(covMat);
+ acc += cholnosqrt.matrixL().coeff(r,c);
+ }
+ timerNoSqrt.stop();
+ }
+
+ for (int t=0; t<TRIES; ++t)
+ {
+ timerSqrt.start();
+ for (int k=0; k<repeats; ++k)
+ {
+ Cholesky<SquareMatrixType> chol(covMat);
+ acc += chol.matrixL().coeff(r,c);
+ }
+ timerSqrt.stop();
+ }
+
+ if (MatrixType::RowsAtCompileTime==Dynamic)
+ std::cout << "dyn ";
+ else
+ std::cout << "fixed ";
+ std::cout << covMat.rows() << " \t"
+ << (timerNoSqrt.value() * REPEAT) / repeats << "s \t"
+ << (timerSqrt.value() * REPEAT) / repeats << "s";
+
+
+ #ifdef BENCH_GSL
+ if (MatrixType::RowsAtCompileTime==Dynamic)
+ {
+ timerSqrt.reset();
+
+ gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
+ gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
+
+ eiToGsl(covMat, &gslCovMat);
+ for (int t=0; t<TRIES; ++t)
+ {
+ timerSqrt.start();
+ for (int k=0; k<repeats; ++k)
+ {
+ gsl_matrix_memcpy(gslCopy,gslCovMat);
+ gsl_linalg_cholesky_decomp(gslCopy);
+ acc += gsl_matrix_get(gslCopy,r,c);
+ }
+ timerSqrt.stop();
+ }
+
+ std::cout << " | \t"
+ << timerSqrt.value() * REPEAT / repeats << "s";
+
+ gsl_matrix_free(gslCovMat);
+ }
+ #endif
+ std::cout << "\n";
+ // make sure the compiler does not optimize too much
+ if (acc==123)
+ std::cout << acc;
+}
+
+int main(int argc, char* argv[])
+{
+ const int dynsizes[] = {4,6,8,12,16,24,32,64,128,256,512,0};
+ std::cout << "size no sqrt standard";
+ #ifdef BENCH_GSL
+ std::cout << " GSL (standard + double + ATLAS) ";
+ #endif
+ std::cout << "\n";
+
+ for (uint i=0; dynsizes[i]>0; ++i)
+ benchCholesky(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
+
+ benchCholesky(Matrix<Scalar,2,2>());
+ benchCholesky(Matrix<Scalar,3,3>());
+ benchCholesky(Matrix<Scalar,4,4>());
+ benchCholesky(Matrix<Scalar,5,5>());
+ benchCholesky(Matrix<Scalar,6,6>());
+ benchCholesky(Matrix<Scalar,7,7>());
+ benchCholesky(Matrix<Scalar,8,8>());
+ benchCholesky(Matrix<Scalar,12,12>());
+ benchCholesky(Matrix<Scalar,16,16>());
+ return 0;
+}
+
diff --git a/bench/benchEigenSolver.cpp b/bench/benchEigenSolver.cpp
new file mode 100644
index 000000000..a62ee41f4
--- /dev/null
+++ b/bench/benchEigenSolver.cpp
@@ -0,0 +1,210 @@
+
+// g++ -DNDEBUG -O3 -I.. benchEigenSolver.cpp -o benchEigenSolver && ./benchEigenSolver
+// options:
+// -DBENCH_GMM
+// -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
+// -DEIGEN_DONT_VECTORIZE
+// -msse2
+// -DREPEAT=100
+// -DTRIES=10
+// -DSCALAR=double
+
+#include <Eigen/Array>
+#include <Eigen/QR>
+#include <bench/BenchUtil.h>
+using namespace Eigen;
+
+#ifndef REPEAT
+#define REPEAT 1000
+#endif
+
+#ifndef TRIES
+#define TRIES 4
+#endif
+
+#ifndef SCALAR
+#define SCALAR float
+#endif
+
+typedef SCALAR Scalar;
+
+template <typename MatrixType>
+__attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
+{
+ int rows = m.rows();
+ int cols = m.cols();
+
+ int stdRepeats = std::max(1,int((REPEAT*1000)/(rows*rows*sqrt(rows))));
+ int saRepeats = stdRepeats * 4;
+
+ typedef typename MatrixType::Scalar Scalar;
+ typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
+
+ MatrixType a = MatrixType::random(rows,cols);
+ SquareMatrixType covMat = a * a.adjoint();
+
+ BenchTimer timerSa, timerStd;
+
+ Scalar acc = 0;
+ int r = ei_random<int>(0,covMat.rows()-1);
+ int c = ei_random<int>(0,covMat.cols()-1);
+ {
+ SelfAdjointEigenSolver<SquareMatrixType> ei(covMat);
+ for (int t=0; t<TRIES; ++t)
+ {
+ timerSa.start();
+ for (int k=0; k<saRepeats; ++k)
+ {
+ ei.compute(covMat);
+ acc += ei.eigenvectors().coeff(r,c);
+ }
+ timerSa.stop();
+ }
+ }
+
+ {
+ EigenSolver<SquareMatrixType> ei(covMat);
+ for (int t=0; t<TRIES; ++t)
+ {
+ timerStd.start();
+ for (int k=0; k<stdRepeats; ++k)
+ {
+ ei.compute(covMat);
+ acc += ei.eigenvectors().coeff(r,c);
+ }
+ timerStd.stop();
+ }
+ }
+
+ if (MatrixType::RowsAtCompileTime==Dynamic)
+ std::cout << "dyn ";
+ else
+ std::cout << "fixed ";
+ std::cout << covMat.rows() << " \t"
+ << timerSa.value() * REPEAT / saRepeats << "s \t"
+ << timerStd.value() * REPEAT / stdRepeats << "s";
+
+ #ifdef BENCH_GMM
+ if (MatrixType::RowsAtCompileTime==Dynamic)
+ {
+ timerSa.reset();
+ timerStd.reset();
+
+ gmm::dense_matrix<Scalar> gmmCovMat(covMat.rows(),covMat.cols());
+ gmm::dense_matrix<Scalar> eigvect(covMat.rows(),covMat.cols());
+ std::vector<Scalar> eigval(covMat.rows());
+ eiToGmm(covMat, gmmCovMat);
+ for (int t=0; t<TRIES; ++t)
+ {
+ timerSa.start();
+ for (int k=0; k<saRepeats; ++k)
+ {
+ gmm::symmetric_qr_algorithm(gmmCovMat, eigval, eigvect);
+ acc += eigvect(r,c);
+ }
+ timerSa.stop();
+ }
+ // the non-selfadjoint solver does not compute the eigen vectors
+// for (int t=0; t<TRIES; ++t)
+// {
+// timerStd.start();
+// for (int k=0; k<stdRepeats; ++k)
+// {
+// gmm::implicit_qr_algorithm(gmmCovMat, eigval, eigvect);
+// acc += eigvect(r,c);
+// }
+// timerStd.stop();
+// }
+
+ std::cout << " | \t"
+ << timerSa.value() * REPEAT / saRepeats << "s"
+ << /*timerStd.value() * REPEAT / stdRepeats << "s"*/ " na ";
+ }
+ #endif
+
+ #ifdef BENCH_GSL
+ if (MatrixType::RowsAtCompileTime==Dynamic)
+ {
+ timerSa.reset();
+ timerStd.reset();
+
+ gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
+ gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
+ gsl_matrix* eigvect = gsl_matrix_alloc(covMat.rows(),covMat.cols());
+ gsl_vector* eigval = gsl_vector_alloc(covMat.rows());
+ gsl_eigen_symmv_workspace* eisymm = gsl_eigen_symmv_alloc(covMat.rows());
+
+ gsl_matrix_complex* eigvectz = gsl_matrix_complex_alloc(covMat.rows(),covMat.cols());
+ gsl_vector_complex* eigvalz = gsl_vector_complex_alloc(covMat.rows());
+ gsl_eigen_nonsymmv_workspace* einonsymm = gsl_eigen_nonsymmv_alloc(covMat.rows());
+
+ eiToGsl(covMat, &gslCovMat);
+ for (int t=0; t<TRIES; ++t)
+ {
+ timerSa.start();
+ for (int k=0; k<saRepeats; ++k)
+ {
+ gsl_matrix_memcpy(gslCopy,gslCovMat);
+ gsl_eigen_symmv(gslCopy, eigval, eigvect, eisymm);
+ acc += gsl_matrix_get(eigvect,r,c);
+ }
+ timerSa.stop();
+ }
+ for (int t=0; t<TRIES; ++t)
+ {
+ timerStd.start();
+ for (int k=0; k<stdRepeats; ++k)
+ {
+ gsl_matrix_memcpy(gslCopy,gslCovMat);
+ gsl_eigen_nonsymmv(gslCopy, eigvalz, eigvectz, einonsymm);
+ acc += GSL_REAL(gsl_matrix_complex_get(eigvectz,r,c));
+ }
+ timerStd.stop();
+ }
+
+ std::cout << " | \t"
+ << timerSa.value() * REPEAT / saRepeats << "s \t"
+ << timerStd.value() * REPEAT / stdRepeats << "s";
+
+ gsl_matrix_free(gslCovMat);
+ gsl_vector_free(gslCopy);
+ gsl_matrix_free(eigvect);
+ gsl_vector_free(eigval);
+ gsl_matrix_complex_free(eigvectz);
+ gsl_vector_complex_free(eigvalz);
+ gsl_eigen_symmv_free(eisymm);
+ gsl_eigen_nonsymmv_free(einonsymm);
+ }
+ #endif
+
+ std::cout << "\n";
+
+ // make sure the compiler does not optimize too much
+ if (acc==123)
+ std::cout << acc;
+}
+
+int main(int argc, char* argv[])
+{
+ const int dynsizes[] = {4,6,8,12,16,24,32,64,128,256,512,0};
+ std::cout << "size selfadjoint generic";
+ #ifdef BENCH_GMM
+ std::cout << " GMM++ ";
+ #endif
+ #ifdef BENCH_GSL
+ std::cout << " GSL (double + ATLAS) ";
+ #endif
+ std::cout << "\n";
+ for (uint i=0; dynsizes[i]>0; ++i)
+ benchEigenSolver(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
+
+ benchEigenSolver(Matrix<Scalar,2,2>());
+ benchEigenSolver(Matrix<Scalar,3,3>());
+ benchEigenSolver(Matrix<Scalar,4,4>());
+ benchEigenSolver(Matrix<Scalar,6,6>());
+ benchEigenSolver(Matrix<Scalar,8,8>());
+ benchEigenSolver(Matrix<Scalar,12,12>());
+ benchEigenSolver(Matrix<Scalar,16,16>());
+ return 0;
+}
+
diff --git a/bench/bench_unrolling b/bench/bench_unrolling
index 7934031a8..bf01cce7d 100755
--- a/bench/bench_unrolling
+++ b/bench/bench_unrolling
@@ -5,7 +5,7 @@
for ((i=1; i<16; ++i)); do
echo "Matrix size: $i x $i :"
- $CXX -O3 -I.. -DNDEBUG benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=1024 -DEIGEN_UNROLLING_LIMIT=25 -o benchmark && time ./benchmark >/dev/null
+ $CXX -O3 -I.. -DNDEBUG benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=1024 -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null
$CXX -O3 -I.. -DNDEBUG -finline-limit=10000 benchmark.cpp -DMATSIZE=$i -DEIGEN_DONT_USE_UNROLLED_LOOPS=1 -o benchmark && time ./benchmark >/dev/null
echo " "
done
diff --git a/bench/ompbench.cxxlist b/bench/ompbench.cxxlist
deleted file mode 100644
index fc6681d33..000000000
--- a/bench/ompbench.cxxlist
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-
-CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp"
-
-# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=10000 -fopenmp"
-
-CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp" \ No newline at end of file
diff --git a/bench/ompbenchmark.cpp b/bench/ompbenchmark.cpp
deleted file mode 100644
index ac5155cb8..000000000
--- a/bench/ompbenchmark.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-// g++ -O3 -DNDEBUG -I.. -fopenmp benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null
-// icpc -fast -fno-exceptions -DNDEBUG -I.. -openmp benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null
-
-#include <omp.h>
-#include "BenchUtil.h"
-#include "basicbenchmark.h"
-
-// #include <Eigen/Core>
-// #include "BenchTimer.h"
-//
-// using namespace std;
-// USING_PART_OF_NAMESPACE_EIGEN
-//
-// enum {LazyEval, EarlyEval, OmpEval};
-//
-// template<int Mode, typename MatrixType>
-// double benchSingleProc(const MatrixType& mat, int iterations, int tries) __attribute__((noinline));
-//
-// template<int Mode, typename MatrixType>
-// double benchBasic(const MatrixType& mat, int iterations, int tries)
-// {
-// const int rows = mat.rows();
-// const int cols = mat.cols();
-//
-// Eigen::BenchTimer timer;
-// for(uint t=0; t<tries; ++t)
-// {
-// MatrixType I = MatrixType::identity(rows, cols);
-// MatrixType m = MatrixType::random(rows, cols);
-//
-// timer.start();
-// for(int a = 0; a < iterations; a++)
-// {
-// if(Mode==LazyEval)
-// m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
-// else if(Mode==OmpEval)
-// m = (I + 0.00005 * (m + m.lazyProduct(m))).evalOMP();
-// else
-// m = I + 0.00005 * (m + m * m);
-// }
-// timer.stop();
-// cerr << m;
-// }
-// return timer.value();
-// };
-
-int main(int argc, char *argv[])
-{
- // disbale floating point exceptions
- // this leads to more stable bench results
- {
- int aux;
- asm(
- "stmxcsr %[aux] \n\t"
- "orl $32832, %[aux] \n\t"
- "ldmxcsr %[aux] \n\t"
- : : [aux] "m" (aux));
- }
-
- // commented since the default setting is use as many threads as processors
- //omp_set_num_threads(omp_get_num_procs());
-
- std::cout << "double, fixed-size 4x4: "
- << benchBasic<LazyEval>(Matrix4d(), 10000, 10) << "s "
- << benchBasic<OmpEval>(Matrix4d(), 10000, 10) << "s \n";
-
- #define BENCH_MATRIX(TYPE, SIZE, ITERATIONS, TRIES) {\
- double single = benchBasic<LazyEval>(Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \
- double omp = benchBasic<OmpEval> (Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \
- std::cout << #TYPE << ", " << #SIZE << "x" << #SIZE << ": " << single << "s " << omp << "s " \
- << " => x" << single/omp << " (" << omp_get_num_procs() << ")" << std::endl; \
- }
-
- BENCH_MATRIX(double, 32, 1000, 10);
- BENCH_MATRIX(double, 128, 10, 10);
- BENCH_MATRIX(double, 512, 1, 6);
- BENCH_MATRIX(double, 1024, 1, 4);
-
- return 0;
-}
-