aboutsummaryrefslogtreecommitdiffhomepage
path: root/bench
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2008-08-09 20:06:25 +0000
committerGravatar Gael Guennebaud <g.gael@free.fr>2008-08-09 20:06:25 +0000
commitb13148c3588c2be8805630c117cdc2b4cfb00576 (patch)
treefff72a7f4503c1b2dbfaab64db0260661707013d /bench
parentd6e88f81551d5d9c1b66f4fc13d2f2211cb689ff (diff)
renamed inverseProduct => solveTriangular
Diffstat (limited to 'bench')
-rw-r--r--bench/benchCholesky.cpp14
-rw-r--r--bench/benchVecAdd.cpp86
-rw-r--r--bench/btl/libs/blitz/btl_tiny_blitz.cpp1
-rw-r--r--bench/btl/libs/eigen2/eigen2_interface.hh3
4 files changed, 56 insertions, 48 deletions
diff --git a/bench/benchCholesky.cpp b/bench/benchCholesky.cpp
index dd0c7f83b..f64b61b71 100644
--- a/bench/benchCholesky.cpp
+++ b/bench/benchCholesky.cpp
@@ -18,7 +18,7 @@ using namespace Eigen;
#endif
#ifndef TRIES
-#define TRIES 4
+#define TRIES 10
#endif
typedef float Scalar;
@@ -29,6 +29,13 @@ __attribute__ ((noinline)) void benchCholesky(const MatrixType& m)
int rows = m.rows();
int cols = m.cols();
+ int cost = 0;
+ for (int j=0; j<rows; ++j)
+ {
+ int r = std::max(rows - j -1,0);
+ cost += 2*(r*j+r+j);
+ }
+
int repeats = (REPEAT*1000)/(rows*rows);
typedef typename MatrixType::Scalar Scalar;
@@ -70,7 +77,8 @@ __attribute__ ((noinline)) void benchCholesky(const MatrixType& m)
std::cout << "fixed ";
std::cout << covMat.rows() << " \t"
<< (timerNoSqrt.value() * REPEAT) / repeats << "s \t"
- << (timerSqrt.value() * REPEAT) / repeats << "s";
+ << (timerSqrt.value() * REPEAT) / repeats << "s "
+ << "(" << 1e-6 * cost*repeats/timerSqrt.value() << " MFLOPS)\n";
#ifdef BENCH_GSL
@@ -108,7 +116,7 @@ __attribute__ ((noinline)) void benchCholesky(const MatrixType& m)
int main(int argc, char* argv[])
{
- const int dynsizes[] = {/*4,6,8,12,16,24,32,49,64,67,128,129,130,131,132,*/256,257,258,259,260,512,0};
+ const int dynsizes[] = {/*4,6,8,12,16,24,32,49,64,67,128,129,130,131,132,*/256,257,258,259,260,512,900,0};
std::cout << "size no sqrt standard";
#ifdef BENCH_GSL
std::cout << " GSL (standard + double + ATLAS) ";
diff --git a/bench/benchVecAdd.cpp b/bench/benchVecAdd.cpp
index aa211dce0..ef85b3ced 100644
--- a/bench/benchVecAdd.cpp
+++ b/bench/benchVecAdd.cpp
@@ -22,7 +22,7 @@ int main(int argc, char* argv[])
int size = SIZE * 8;
int size2 = size * size;
Scalar* a = ei_aligned_malloc<Scalar>(size2);
- Scalar* b = ei_aligned_malloc<Scalar>(size2);
+ Scalar* b = ei_aligned_malloc<Scalar>(size2+4)+1;
Scalar* c = ei_aligned_malloc<Scalar>(size2);
for (int i=0; i<size; ++i)
@@ -33,22 +33,22 @@ int main(int argc, char* argv[])
BenchTimer timer;
timer.reset();
- for (int k=0; k<3; ++k)
+ for (int k=0; k<10; ++k)
{
timer.start();
benchVec(a, b, c, size2);
timer.stop();
}
std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
-
+ return 0;
for (int innersize = size; innersize>2 ; --innersize)
{
if (size2%innersize==0)
{
int outersize = size2/innersize;
- MatrixXf ma = MatrixXf::map(a, innersize, outersize );
- MatrixXf mb = MatrixXf::map(b, innersize, outersize );
- MatrixXf mc = MatrixXf::map(c, innersize, outersize );
+ MatrixXf ma = Map<MatrixXf>(a, innersize, outersize );
+ MatrixXf mb = Map<MatrixXf>(b, innersize, outersize );
+ MatrixXf mc = Map<MatrixXf>(c, innersize, outersize );
timer.reset();
for (int k=0; k<3; ++k)
{
@@ -60,9 +60,9 @@ int main(int argc, char* argv[])
}
}
- VectorXf va = VectorXf::map(a, size2);
- VectorXf vb = VectorXf::map(b, size2);
- VectorXf vc = VectorXf::map(c, size2);
+ VectorXf va = Map<VectorXf>(a, size2);
+ VectorXf vb = Map<VectorXf>(b, size2);
+ VectorXf vc = Map<VectorXf>(c, size2);
timer.reset();
for (int k=0; k<3; ++k)
{
@@ -95,40 +95,40 @@ void benchVec(Scalar* a, Scalar* b, Scalar* c, int size)
for (int k=0; k<REPEAT; ++k)
for (int i=0; i<size; i+=PacketSize*8)
{
- a0 = ei_pload(&a[i]);
- b0 = ei_pload(&b[i]);
- a1 = ei_pload(&a[i+1*PacketSize]);
- b1 = ei_pload(&b[i+1*PacketSize]);
- a2 = ei_pload(&a[i+2*PacketSize]);
- b2 = ei_pload(&b[i+2*PacketSize]);
- a3 = ei_pload(&a[i+3*PacketSize]);
- b3 = ei_pload(&b[i+3*PacketSize]);
- ei_pstore(&a[i], ei_padd(a0, b0));
- a0 = ei_pload(&a[i+4*PacketSize]);
- b0 = ei_pload(&b[i+4*PacketSize]);
-
- ei_pstore(&a[i+1*PacketSize], ei_padd(a1, b1));
- a1 = ei_pload(&a[i+5*PacketSize]);
- b1 = ei_pload(&b[i+5*PacketSize]);
-
- ei_pstore(&a[i+2*PacketSize], ei_padd(a2, b2));
- a2 = ei_pload(&a[i+6*PacketSize]);
- b2 = ei_pload(&b[i+6*PacketSize]);
-
- ei_pstore(&a[i+3*PacketSize], ei_padd(a3, b3));
- a3 = ei_pload(&a[i+7*PacketSize]);
- b3 = ei_pload(&b[i+7*PacketSize]);
-
- ei_pstore(&a[i+4*PacketSize], ei_padd(a0, b0));
- ei_pstore(&a[i+5*PacketSize], ei_padd(a1, b1));
- ei_pstore(&a[i+6*PacketSize], ei_padd(a2, b2));
- ei_pstore(&a[i+7*PacketSize], ei_padd(a3, b3));
+// a0 = ei_pload(&a[i]);
+// b0 = ei_pload(&b[i]);
+// a1 = ei_pload(&a[i+1*PacketSize]);
+// b1 = ei_pload(&b[i+1*PacketSize]);
+// a2 = ei_pload(&a[i+2*PacketSize]);
+// b2 = ei_pload(&b[i+2*PacketSize]);
+// a3 = ei_pload(&a[i+3*PacketSize]);
+// b3 = ei_pload(&b[i+3*PacketSize]);
+// ei_pstore(&a[i], ei_padd(a0, b0));
+// a0 = ei_pload(&a[i+4*PacketSize]);
+// b0 = ei_pload(&b[i+4*PacketSize]);
+//
+// ei_pstore(&a[i+1*PacketSize], ei_padd(a1, b1));
+// a1 = ei_pload(&a[i+5*PacketSize]);
+// b1 = ei_pload(&b[i+5*PacketSize]);
+//
+// ei_pstore(&a[i+2*PacketSize], ei_padd(a2, b2));
+// a2 = ei_pload(&a[i+6*PacketSize]);
+// b2 = ei_pload(&b[i+6*PacketSize]);
+//
+// ei_pstore(&a[i+3*PacketSize], ei_padd(a3, b3));
+// a3 = ei_pload(&a[i+7*PacketSize]);
+// b3 = ei_pload(&b[i+7*PacketSize]);
+//
+// ei_pstore(&a[i+4*PacketSize], ei_padd(a0, b0));
+// ei_pstore(&a[i+5*PacketSize], ei_padd(a1, b1));
+// ei_pstore(&a[i+6*PacketSize], ei_padd(a2, b2));
+// ei_pstore(&a[i+7*PacketSize], ei_padd(a3, b3));
-// ei_pstore(&a[i+2*PacketSize], ei_padd(ei_pload(&a[i+2*PacketSize]), ei_pload(&b[i+2*PacketSize])));
-// ei_pstore(&a[i+3*PacketSize], ei_padd(ei_pload(&a[i+3*PacketSize]), ei_pload(&b[i+3*PacketSize])));
-// ei_pstore(&a[i+4*PacketSize], ei_padd(ei_pload(&a[i+4*PacketSize]), ei_pload(&b[i+4*PacketSize])));
-// ei_pstore(&a[i+5*PacketSize], ei_padd(ei_pload(&a[i+5*PacketSize]), ei_pload(&b[i+5*PacketSize])));
-// ei_pstore(&a[i+6*PacketSize], ei_padd(ei_pload(&a[i+6*PacketSize]), ei_pload(&b[i+6*PacketSize])));
-// ei_pstore(&a[i+7*PacketSize], ei_padd(ei_pload(&a[i+7*PacketSize]), ei_pload(&b[i+7*PacketSize])));
+ ei_pstore(&a[i+2*PacketSize], ei_padd(ei_ploadu(&a[i+2*PacketSize]), ei_ploadu(&b[i+2*PacketSize])));
+ ei_pstore(&a[i+3*PacketSize], ei_padd(ei_ploadu(&a[i+3*PacketSize]), ei_ploadu(&b[i+3*PacketSize])));
+ ei_pstore(&a[i+4*PacketSize], ei_padd(ei_ploadu(&a[i+4*PacketSize]), ei_ploadu(&b[i+4*PacketSize])));
+ ei_pstore(&a[i+5*PacketSize], ei_padd(ei_ploadu(&a[i+5*PacketSize]), ei_ploadu(&b[i+5*PacketSize])));
+ ei_pstore(&a[i+6*PacketSize], ei_padd(ei_ploadu(&a[i+6*PacketSize]), ei_ploadu(&b[i+6*PacketSize])));
+ ei_pstore(&a[i+7*PacketSize], ei_padd(ei_ploadu(&a[i+7*PacketSize]), ei_ploadu(&b[i+7*PacketSize])));
}
}
diff --git a/bench/btl/libs/blitz/btl_tiny_blitz.cpp b/bench/btl/libs/blitz/btl_tiny_blitz.cpp
index eff3c1881..9fddde752 100644
--- a/bench/btl/libs/blitz/btl_tiny_blitz.cpp
+++ b/bench/btl/libs/blitz/btl_tiny_blitz.cpp
@@ -23,7 +23,6 @@
#include "action_matrix_vector_product.hh"
#include "action_matrix_matrix_product.hh"
#include "action_axpy.hh"
-#include "timers/x86_perf_analyzer.hh"
BTL_MAIN;
diff --git a/bench/btl/libs/eigen2/eigen2_interface.hh b/bench/btl/libs/eigen2/eigen2_interface.hh
index 64314ab3b..f3ba5e8af 100644
--- a/bench/btl/libs/eigen2/eigen2_interface.hh
+++ b/bench/btl/libs/eigen2/eigen2_interface.hh
@@ -134,7 +134,7 @@ public :
}
static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector& X, int N){
- X = L.template marked<Lower>().inverseProduct(B);
+ X = L.template marked<Lower>().solveTriangular(B);
}
static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
@@ -146,6 +146,7 @@ public :
static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
C = X.lu().matrixLU();
+// C = X.inverse();
}
static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){