Make our gemm bench a little more powerful.

author: Gael Guennebaud <g.gael@free.fr> 2014-04-17 21:03:26 +0200
committer: Gael Guennebaud <g.gael@free.fr> 2014-04-17 21:03:26 +0200
commit: c354bd47f7001bd6b3c43fc4a4b5d27f764aa5c3 (patch)
tree: a414e46d391b89d78ffe5203733c5b6710df962a /bench/bench_gemm.cpp
parent: 9777a5ca60f0a82bb789f55912fd046ab7f3d15d (diff)
1 files changed, 76 insertions, 24 deletions
diff --git a/bench/bench_gemm.cpp b/bench/bench_gemm.cpp
index 1ef2e72c2..8222271fb 100644
--- a/bench/bench_gemm.cpp
+++ b/bench/bench_gemm.cpp
@@ -2,6 +2,14 @@
 // g++-4.4 bench_gemm.cpp -I .. -O2 -DNDEBUG -lrt -fopenmp && OMP_NUM_THREADS=2  ./a.out
 // icpc bench_gemm.cpp -I .. -O3 -DNDEBUG -lrt -openmp  && OMP_NUM_THREADS=2  ./a.out
 
+// Compilation options:
+// 
+// -DSCALAR=std::complex<double>
+// -DSCALARA=double or -DSCALARB=double
+// -DHAVE_BLAS
+// -DDECOUPLED
+//
+
 #include <iostream>
 #include <Eigen/Core>
 #include <bench/BenchTimer.h>
@@ -14,10 +22,18 @@ using namespace Eigen;
 #define SCALAR float
 #endif
 
+#ifndef SCALARA
+#define SCALARA SCALAR
+#endif
+
+#ifndef SCALARB
+#define SCALARB SCALAR
+#endif
+
 typedef SCALAR Scalar;
 typedef NumTraits<Scalar>::Real RealScalar;
-typedef Matrix<RealScalar,Dynamic,Dynamic> A;
-typedef Matrix</*Real*/Scalar,Dynamic,Dynamic> B;
+typedef Matrix<SCALARA,Dynamic,Dynamic> A;
+typedef Matrix<SCALARB,Dynamic,Dynamic> B;
 typedef Matrix<Scalar,Dynamic,Dynamic> C;
 typedef Matrix<RealScalar,Dynamic,Dynamic> M;
 
@@ -135,32 +151,49 @@ int main(int argc, char ** argv)
   int cache_size = -1;
 
   bool need_help = false;
-  for (int i=1; i<argc; ++i)
+  for (int i=1; i<argc;)
   {
-    if(argv[i][0]=='s')
-    {
-      s = atoi(argv[i]+1);
-      m = n = p = s;
-    }
-    else if(argv[i][0]=='m')
+    if(argv[i][0]=='-')
     {
-       m = atoi(argv[++i]);
-       n = atoi(argv[++i]);
-       p = atoi(argv[++i]);
+      if(argv[i][1]=='s')
+      {
+        ++i;
+        s = atoi(argv[i++]);
+        m = n = p = s;
+        if(argv[i][0]!='-')
+        {
+          n = atoi(argv[i++]);
+          p = atoi(argv[i++]);
+        }
+      }
+      else if(argv[i][1]=='c')
+      {
+        ++i;
+        cache_size = atoi(argv[i++]);
+      }
+      else if(argv[i][1]=='t')
+      {
+        ++i;
+        tries = atoi(argv[i++]);
+      }
+      else if(argv[i][1]=='p')
+      {
+        ++i;
+        rep = atoi(argv[i++]);
+      }
     }
-    else if(argv[i][0]=='c')
-      cache_size = atoi(argv[i]+1);
-    else if(argv[i][0]=='t')
-      tries = atoi(argv[i]+1);
-    else if(argv[i][0]=='p')
-      rep = atoi(argv[i]+1);
     else
+    {
       need_help = true;
+      break;
+    }
   }
 
   if(need_help)
   {
-    std::cout << argv[0] << " s<matrix size> c<cache size> t<nb tries> p<nb repeats>\n";
+    std::cout << argv[0] << " -s <matrix sizes> -c <cache size> -t <nb tries> -p <nb repeats>\n";
+    std::cout << "   <matrix sizes> : size\n";
+    std::cout << "   <matrix sizes> : rows columns depth\n";
     return 1;
   }
 
@@ -182,6 +215,7 @@ int main(int argc, char ** argv)
 
   // check the parallel product is correct
   #if defined EIGEN_HAS_OPENMP
+  Eigen::initParallel();
   int procs = omp_get_max_threads();
   if(procs>1)
   {
@@ -198,11 +232,20 @@ int main(int argc, char ** argv)
   #elif defined HAVE_BLAS
     blas_gemm(a,b,r);
     c.noalias() += a * b;
-    if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n";
+    if(!r.isApprox(c)) {
+      std::cout << r  - c << "\n";
+      std::cerr << "Warning, your product is crap!\n\n";
+    }
   #else
-    gemm(a,b,c);
-    r.noalias() += a.cast<Scalar>() * b.cast<Scalar>();
-    if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n";
+    if(1.*m*n*p<2000.*2000*2000)
+    {
+      gemm(a,b,c);
+      r.noalias() += a.cast<Scalar>() .lazyProduct( b.cast<Scalar>() );
+      if(!r.isApprox(c)) {
+        std::cout << r - c << "\n";
+        std::cerr << "Warning, your product is crap!\n\n";
+      }
+    }
   #endif
 
   #ifdef HAVE_BLAS
@@ -224,7 +267,7 @@ int main(int argc, char ** argv)
   {
     BenchTimer tmono;
     omp_set_num_threads(1);
-    Eigen::internal::setNbThreads(1);
+    Eigen::setNbThreads(1);
     c = rc;
     BENCH(tmono, tries, rep, gemm(a,b,c));
     std::cout << "eigen mono cpu    " << tmono.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/tmono.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << tmono.total(CPU_TIMER)  << "s)\n";
@@ -233,6 +276,15 @@ int main(int argc, char ** argv)
   }
   #endif
   
+  if(1.*m*n*p<30*30*30)
+  {
+      BenchTimer tmt;
+      c = rc;
+      BENCH(tmt, tries, rep, c.noalias()+=a.lazyProduct(b));
+      std::cout << "lazy cpu         " << tmt.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << tmt.total(CPU_TIMER)  << "s)\n";
+      std::cout << "lazy real        " << tmt.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n";
+  }
+  
   #ifdef DECOUPLED
   if((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex))
   {
author	Gael Guennebaud <g.gael@free.fr>	2014-04-17 21:03:26 +0200
committer	Gael Guennebaud <g.gael@free.fr>	2014-04-17 21:03:26 +0200
commit	c354bd47f7001bd6b3c43fc4a4b5d27f764aa5c3 (patch)
tree	a414e46d391b89d78ffe5203733c5b6710df962a /bench/bench_gemm.cpp
parent	9777a5ca60f0a82bb789f55912fd046ab7f3d15d (diff)