From 4cd38b333cb34ea5f8cbaf5a1e8471311f0cd1fa Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Mon, 21 Jun 2010 12:07:05 +0200
Subject: make bench_gemm print out the queried cache sizes

---
 Eigen/src/Core/util/Memory.h |  4 ++--
 bench/bench_gemm.cpp         | 14 +++++++++-----
 2 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 1b1ef6712..810bbfacd 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -601,7 +601,7 @@ public:
 
 /** \internal
  * \returns the size in Bytes of the L1 data cache */
-inline std::ptrdiff_t ei_fetchL1CacheSize()
+inline std::ptrdiff_t ei_queryL1CacheSize()
 {
   int abcd[4];
 
@@ -644,7 +644,7 @@ inline std::ptrdiff_t ei_fetchL1CacheSize()
 
 /** \internal
  * \returns the size in Bytes of the L2 or L3 cache if this later is present */
-inline std::ptrdiff_t ei_fetchTopLevelCacheSize()
+inline std::ptrdiff_t ei_queryTopLevelCacheSize()
 {
   int abcd[4];
   EIGEN_CPUID(abcd,0x80000006);
diff --git a/bench/bench_gemm.cpp b/bench/bench_gemm.cpp
index 7b12736da..ee34e6ddc 100644
--- a/bench/bench_gemm.cpp
+++ b/bench/bench_gemm.cpp
@@ -57,6 +57,7 @@ void blas_gemm(const MatrixXd& a, const MatrixXd& b, MatrixXd& c)
 
 #endif
 
+template<typename M>
 void gemm(const M& a, const M& b, M& c)
 {
   c.noalias() += a * b;
@@ -64,6 +65,9 @@ void gemm(const M& a, const M& b, M& c)
 
 int main(int argc, char ** argv)
 {
+  std::cout << "L1 cache size    = " << ei_queryL1CacheSize()/1024 << " KB\n";
+  std::cout << "L2/L3 cache size = " << ei_queryTopLevelCacheSize()/1024 << " KB\n";  
+  
   int rep = 1;    // number of repetitions per try
   int tries = 5;  // number of tries, we keep the best
 
@@ -90,11 +94,6 @@ int main(int argc, char ** argv)
   if(cache_size>0)
     setCpuCacheSizes(cache_size,32*cache_size);
 
-  std::cout << "Matrix size = " << s << "\n";
-  std::ptrdiff_t cm, cn, ck;
-  getBlockingSizes<Scalar>(ck, cm, cn);
-  std::cout << "blocking size = " << cm << " x " << ck << "\n";
-
   int m = s;
   int n = s;
   int p = s;
@@ -102,6 +101,11 @@ int main(int argc, char ** argv)
   M b(n,p); b.setRandom();
   M c(m,p); c.setOnes();
 
+  std::cout << "Matrix sizes = " << m << "x" << p << " * " << p << "x" << n << "\n";
+  std::ptrdiff_t cm, cn, ck;
+  getBlockingSizes<Scalar>(ck, cm, cn);
+  std::cout << "blocking size = " << cm << " x " << ck << "\n";
+
   M r = c;
 
   // check the parallel product is correct
-- 
cgit v1.2.3