diff options
author | Gael Guennebaud <g.gael@free.fr> | 2014-04-17 21:03:26 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2014-04-17 21:03:26 +0200 |
commit | c354bd47f7001bd6b3c43fc4a4b5d27f764aa5c3 (patch) | |
tree | a414e46d391b89d78ffe5203733c5b6710df962a /bench | |
parent | 9777a5ca60f0a82bb789f55912fd046ab7f3d15d (diff) |
Make our gemm bench a little more powerful.
Diffstat (limited to 'bench')
-rw-r--r-- | bench/bench_gemm.cpp | 100 |
1 files changed, 76 insertions, 24 deletions
diff --git a/bench/bench_gemm.cpp b/bench/bench_gemm.cpp index 1ef2e72c2..8222271fb 100644 --- a/bench/bench_gemm.cpp +++ b/bench/bench_gemm.cpp @@ -2,6 +2,14 @@ // g++-4.4 bench_gemm.cpp -I .. -O2 -DNDEBUG -lrt -fopenmp && OMP_NUM_THREADS=2 ./a.out // icpc bench_gemm.cpp -I .. -O3 -DNDEBUG -lrt -openmp && OMP_NUM_THREADS=2 ./a.out +// Compilation options: +// +// -DSCALAR=std::complex<double> +// -DSCALARA=double or -DSCALARB=double +// -DHAVE_BLAS +// -DDECOUPLED +// + #include <iostream> #include <Eigen/Core> #include <bench/BenchTimer.h> @@ -14,10 +22,18 @@ using namespace Eigen; #define SCALAR float #endif +#ifndef SCALARA +#define SCALARA SCALAR +#endif + +#ifndef SCALARB +#define SCALARB SCALAR +#endif + typedef SCALAR Scalar; typedef NumTraits<Scalar>::Real RealScalar; -typedef Matrix<RealScalar,Dynamic,Dynamic> A; -typedef Matrix</*Real*/Scalar,Dynamic,Dynamic> B; +typedef Matrix<SCALARA,Dynamic,Dynamic> A; +typedef Matrix<SCALARB,Dynamic,Dynamic> B; typedef Matrix<Scalar,Dynamic,Dynamic> C; typedef Matrix<RealScalar,Dynamic,Dynamic> M; @@ -135,32 +151,49 @@ int main(int argc, char ** argv) int cache_size = -1; bool need_help = false; - for (int i=1; i<argc; ++i) + for (int i=1; i<argc;) { - if(argv[i][0]=='s') - { - s = atoi(argv[i]+1); - m = n = p = s; - } - else if(argv[i][0]=='m') + if(argv[i][0]=='-') { - m = atoi(argv[++i]); - n = atoi(argv[++i]); - p = atoi(argv[++i]); + if(argv[i][1]=='s') + { + ++i; + s = atoi(argv[i++]); + m = n = p = s; + if(argv[i][0]!='-') + { + n = atoi(argv[i++]); + p = atoi(argv[i++]); + } + } + else if(argv[i][1]=='c') + { + ++i; + cache_size = atoi(argv[i++]); + } + else if(argv[i][1]=='t') + { + ++i; + tries = atoi(argv[i++]); + } + else if(argv[i][1]=='p') + { + ++i; + rep = atoi(argv[i++]); + } } - else if(argv[i][0]=='c') - cache_size = atoi(argv[i]+1); - else if(argv[i][0]=='t') - tries = atoi(argv[i]+1); - else if(argv[i][0]=='p') - rep = atoi(argv[i]+1); else + { need_help = true; + break; + } } if(need_help) { - std::cout << argv[0] << " s<matrix size> c<cache size> t<nb tries> p<nb repeats>\n"; + std::cout << argv[0] << " -s <matrix sizes> -c <cache size> -t <nb tries> -p <nb repeats>\n"; + std::cout << " <matrix sizes> : size\n"; + std::cout << " <matrix sizes> : rows columns depth\n"; return 1; } @@ -182,6 +215,7 @@ int main(int argc, char ** argv) // check the parallel product is correct #if defined EIGEN_HAS_OPENMP + Eigen::initParallel(); int procs = omp_get_max_threads(); if(procs>1) { @@ -198,11 +232,20 @@ int main(int argc, char ** argv) #elif defined HAVE_BLAS blas_gemm(a,b,r); c.noalias() += a * b; - if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n"; + if(!r.isApprox(c)) { + std::cout << r - c << "\n"; + std::cerr << "Warning, your product is crap!\n\n"; + } #else - gemm(a,b,c); - r.noalias() += a.cast<Scalar>() * b.cast<Scalar>(); - if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n"; + if(1.*m*n*p<2000.*2000*2000) + { + gemm(a,b,c); + r.noalias() += a.cast<Scalar>() .lazyProduct( b.cast<Scalar>() ); + if(!r.isApprox(c)) { + std::cout << r - c << "\n"; + std::cerr << "Warning, your product is crap!\n\n"; + } + } #endif #ifdef HAVE_BLAS @@ -224,7 +267,7 @@ int main(int argc, char ** argv) { BenchTimer tmono; omp_set_num_threads(1); - Eigen::internal::setNbThreads(1); + Eigen::setNbThreads(1); c = rc; BENCH(tmono, tries, rep, gemm(a,b,c)); std::cout << "eigen mono cpu " << tmono.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER) << "s)\n"; @@ -233,6 +276,15 @@ int main(int argc, char ** argv) } #endif + if(1.*m*n*p<30*30*30) + { + BenchTimer tmt; + c = rc; + BENCH(tmt, tries, rep, c.noalias()+=a.lazyProduct(b)); + std::cout << "lazy cpu " << tmt.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) << "s)\n"; + std::cout << "lazy real " << tmt.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n"; + } + #ifdef DECOUPLED if((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex)) { |