diff options
author | Gael Guennebaud <g.gael@free.fr> | 2008-08-09 20:06:25 +0000 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2008-08-09 20:06:25 +0000 |
commit | b13148c3588c2be8805630c117cdc2b4cfb00576 (patch) | |
tree | fff72a7f4503c1b2dbfaab64db0260661707013d /bench | |
parent | d6e88f81551d5d9c1b66f4fc13d2f2211cb689ff (diff) |
renamed inverseProduct => solveTriangular
Diffstat (limited to 'bench')
-rw-r--r-- | bench/benchCholesky.cpp | 14 | ||||
-rw-r--r-- | bench/benchVecAdd.cpp | 86 | ||||
-rw-r--r-- | bench/btl/libs/blitz/btl_tiny_blitz.cpp | 1 | ||||
-rw-r--r-- | bench/btl/libs/eigen2/eigen2_interface.hh | 3 |
4 files changed, 56 insertions, 48 deletions
diff --git a/bench/benchCholesky.cpp b/bench/benchCholesky.cpp index dd0c7f83b..f64b61b71 100644 --- a/bench/benchCholesky.cpp +++ b/bench/benchCholesky.cpp @@ -18,7 +18,7 @@ using namespace Eigen; #endif #ifndef TRIES -#define TRIES 4 +#define TRIES 10 #endif typedef float Scalar; @@ -29,6 +29,13 @@ __attribute__ ((noinline)) void benchCholesky(const MatrixType& m) int rows = m.rows(); int cols = m.cols(); + int cost = 0; + for (int j=0; j<rows; ++j) + { + int r = std::max(rows - j -1,0); + cost += 2*(r*j+r+j); + } + int repeats = (REPEAT*1000)/(rows*rows); typedef typename MatrixType::Scalar Scalar; @@ -70,7 +77,8 @@ __attribute__ ((noinline)) void benchCholesky(const MatrixType& m) std::cout << "fixed "; std::cout << covMat.rows() << " \t" << (timerNoSqrt.value() * REPEAT) / repeats << "s \t" - << (timerSqrt.value() * REPEAT) / repeats << "s"; + << (timerSqrt.value() * REPEAT) / repeats << "s " + << "(" << 1e-6 * cost*repeats/timerSqrt.value() << " MFLOPS)\n"; #ifdef BENCH_GSL @@ -108,7 +116,7 @@ __attribute__ ((noinline)) void benchCholesky(const MatrixType& m) int main(int argc, char* argv[]) { - const int dynsizes[] = {/*4,6,8,12,16,24,32,49,64,67,128,129,130,131,132,*/256,257,258,259,260,512,0}; + const int dynsizes[] = {/*4,6,8,12,16,24,32,49,64,67,128,129,130,131,132,*/256,257,258,259,260,512,900,0}; std::cout << "size no sqrt standard"; #ifdef BENCH_GSL std::cout << " GSL (standard + double + ATLAS) "; diff --git a/bench/benchVecAdd.cpp b/bench/benchVecAdd.cpp index aa211dce0..ef85b3ced 100644 --- a/bench/benchVecAdd.cpp +++ b/bench/benchVecAdd.cpp @@ -22,7 +22,7 @@ int main(int argc, char* argv[]) int size = SIZE * 8; int size2 = size * size; Scalar* a = ei_aligned_malloc<Scalar>(size2); - Scalar* b = ei_aligned_malloc<Scalar>(size2); + Scalar* b = ei_aligned_malloc<Scalar>(size2+4)+1; Scalar* c = ei_aligned_malloc<Scalar>(size2); for (int i=0; i<size; ++i) @@ -33,22 +33,22 @@ int main(int argc, char* argv[]) BenchTimer timer; timer.reset(); - for (int k=0; k<3; ++k) + for (int k=0; k<10; ++k) { timer.start(); benchVec(a, b, c, size2); timer.stop(); } std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; - + return 0; for (int innersize = size; innersize>2 ; --innersize) { if (size2%innersize==0) { int outersize = size2/innersize; - MatrixXf ma = MatrixXf::map(a, innersize, outersize ); - MatrixXf mb = MatrixXf::map(b, innersize, outersize ); - MatrixXf mc = MatrixXf::map(c, innersize, outersize ); + MatrixXf ma = Map<MatrixXf>(a, innersize, outersize ); + MatrixXf mb = Map<MatrixXf>(b, innersize, outersize ); + MatrixXf mc = Map<MatrixXf>(c, innersize, outersize ); timer.reset(); for (int k=0; k<3; ++k) { @@ -60,9 +60,9 @@ int main(int argc, char* argv[]) } } - VectorXf va = VectorXf::map(a, size2); - VectorXf vb = VectorXf::map(b, size2); - VectorXf vc = VectorXf::map(c, size2); + VectorXf va = Map<VectorXf>(a, size2); + VectorXf vb = Map<VectorXf>(b, size2); + VectorXf vc = Map<VectorXf>(c, size2); timer.reset(); for (int k=0; k<3; ++k) { @@ -95,40 +95,40 @@ void benchVec(Scalar* a, Scalar* b, Scalar* c, int size) for (int k=0; k<REPEAT; ++k) for (int i=0; i<size; i+=PacketSize*8) { - a0 = ei_pload(&a[i]); - b0 = ei_pload(&b[i]); - a1 = ei_pload(&a[i+1*PacketSize]); - b1 = ei_pload(&b[i+1*PacketSize]); - a2 = ei_pload(&a[i+2*PacketSize]); - b2 = ei_pload(&b[i+2*PacketSize]); - a3 = ei_pload(&a[i+3*PacketSize]); - b3 = ei_pload(&b[i+3*PacketSize]); - ei_pstore(&a[i], ei_padd(a0, b0)); - a0 = ei_pload(&a[i+4*PacketSize]); - b0 = ei_pload(&b[i+4*PacketSize]); - - ei_pstore(&a[i+1*PacketSize], ei_padd(a1, b1)); - a1 = ei_pload(&a[i+5*PacketSize]); - b1 = ei_pload(&b[i+5*PacketSize]); - - ei_pstore(&a[i+2*PacketSize], ei_padd(a2, b2)); - a2 = ei_pload(&a[i+6*PacketSize]); - b2 = ei_pload(&b[i+6*PacketSize]); - - ei_pstore(&a[i+3*PacketSize], ei_padd(a3, b3)); - a3 = ei_pload(&a[i+7*PacketSize]); - b3 = ei_pload(&b[i+7*PacketSize]); - - ei_pstore(&a[i+4*PacketSize], ei_padd(a0, b0)); - ei_pstore(&a[i+5*PacketSize], ei_padd(a1, b1)); - ei_pstore(&a[i+6*PacketSize], ei_padd(a2, b2)); - ei_pstore(&a[i+7*PacketSize], ei_padd(a3, b3)); +// a0 = ei_pload(&a[i]); +// b0 = ei_pload(&b[i]); +// a1 = ei_pload(&a[i+1*PacketSize]); +// b1 = ei_pload(&b[i+1*PacketSize]); +// a2 = ei_pload(&a[i+2*PacketSize]); +// b2 = ei_pload(&b[i+2*PacketSize]); +// a3 = ei_pload(&a[i+3*PacketSize]); +// b3 = ei_pload(&b[i+3*PacketSize]); +// ei_pstore(&a[i], ei_padd(a0, b0)); +// a0 = ei_pload(&a[i+4*PacketSize]); +// b0 = ei_pload(&b[i+4*PacketSize]); +// +// ei_pstore(&a[i+1*PacketSize], ei_padd(a1, b1)); +// a1 = ei_pload(&a[i+5*PacketSize]); +// b1 = ei_pload(&b[i+5*PacketSize]); +// +// ei_pstore(&a[i+2*PacketSize], ei_padd(a2, b2)); +// a2 = ei_pload(&a[i+6*PacketSize]); +// b2 = ei_pload(&b[i+6*PacketSize]); +// +// ei_pstore(&a[i+3*PacketSize], ei_padd(a3, b3)); +// a3 = ei_pload(&a[i+7*PacketSize]); +// b3 = ei_pload(&b[i+7*PacketSize]); +// +// ei_pstore(&a[i+4*PacketSize], ei_padd(a0, b0)); +// ei_pstore(&a[i+5*PacketSize], ei_padd(a1, b1)); +// ei_pstore(&a[i+6*PacketSize], ei_padd(a2, b2)); +// ei_pstore(&a[i+7*PacketSize], ei_padd(a3, b3)); -// ei_pstore(&a[i+2*PacketSize], ei_padd(ei_pload(&a[i+2*PacketSize]), ei_pload(&b[i+2*PacketSize]))); -// ei_pstore(&a[i+3*PacketSize], ei_padd(ei_pload(&a[i+3*PacketSize]), ei_pload(&b[i+3*PacketSize]))); -// ei_pstore(&a[i+4*PacketSize], ei_padd(ei_pload(&a[i+4*PacketSize]), ei_pload(&b[i+4*PacketSize]))); -// ei_pstore(&a[i+5*PacketSize], ei_padd(ei_pload(&a[i+5*PacketSize]), ei_pload(&b[i+5*PacketSize]))); -// ei_pstore(&a[i+6*PacketSize], ei_padd(ei_pload(&a[i+6*PacketSize]), ei_pload(&b[i+6*PacketSize]))); -// ei_pstore(&a[i+7*PacketSize], ei_padd(ei_pload(&a[i+7*PacketSize]), ei_pload(&b[i+7*PacketSize]))); + ei_pstore(&a[i+2*PacketSize], ei_padd(ei_ploadu(&a[i+2*PacketSize]), ei_ploadu(&b[i+2*PacketSize]))); + ei_pstore(&a[i+3*PacketSize], ei_padd(ei_ploadu(&a[i+3*PacketSize]), ei_ploadu(&b[i+3*PacketSize]))); + ei_pstore(&a[i+4*PacketSize], ei_padd(ei_ploadu(&a[i+4*PacketSize]), ei_ploadu(&b[i+4*PacketSize]))); + ei_pstore(&a[i+5*PacketSize], ei_padd(ei_ploadu(&a[i+5*PacketSize]), ei_ploadu(&b[i+5*PacketSize]))); + ei_pstore(&a[i+6*PacketSize], ei_padd(ei_ploadu(&a[i+6*PacketSize]), ei_ploadu(&b[i+6*PacketSize]))); + ei_pstore(&a[i+7*PacketSize], ei_padd(ei_ploadu(&a[i+7*PacketSize]), ei_ploadu(&b[i+7*PacketSize]))); } } diff --git a/bench/btl/libs/blitz/btl_tiny_blitz.cpp b/bench/btl/libs/blitz/btl_tiny_blitz.cpp index eff3c1881..9fddde752 100644 --- a/bench/btl/libs/blitz/btl_tiny_blitz.cpp +++ b/bench/btl/libs/blitz/btl_tiny_blitz.cpp @@ -23,7 +23,6 @@ #include "action_matrix_vector_product.hh" #include "action_matrix_matrix_product.hh" #include "action_axpy.hh" -#include "timers/x86_perf_analyzer.hh" BTL_MAIN; diff --git a/bench/btl/libs/eigen2/eigen2_interface.hh b/bench/btl/libs/eigen2/eigen2_interface.hh index 64314ab3b..f3ba5e8af 100644 --- a/bench/btl/libs/eigen2/eigen2_interface.hh +++ b/bench/btl/libs/eigen2/eigen2_interface.hh @@ -134,7 +134,7 @@ public : } static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector& X, int N){ - X = L.template marked<Lower>().inverseProduct(B); + X = L.template marked<Lower>().solveTriangular(B); } static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){ @@ -146,6 +146,7 @@ public : static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){ C = X.lu().matrixLU(); +// C = X.inverse(); } static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){ |