From 25ba9f377c97968923cd654d419fa8ce260f114d Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Thu, 26 Jun 2008 16:06:41 +0000 Subject: * add bench/benchVecAdd.cpp by Gael, fix crash (ei_pload on non-aligned) * introduce packet(int), make use of it in linear vectorized paths --> completely fixes the slowdown noticed in benchVecAdd. * generalize coeff(int) to linear-access xprs * clarify the access flag bits * rework api dox in Coeffs.h and util/Constants.h * improve certain expressions's flags, allowing more vectorization * fix bug in Block: start(int) and end(int) returned dyn*dyn size * fix bug in Block: just because the Eval type has packet access doesn't imply the block xpr should have it too. --- bench/benchVecAdd.cpp | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 bench/benchVecAdd.cpp (limited to 'bench/benchVecAdd.cpp') diff --git a/bench/benchVecAdd.cpp b/bench/benchVecAdd.cpp new file mode 100644 index 000000000..aa211dce0 --- /dev/null +++ b/bench/benchVecAdd.cpp @@ -0,0 +1,134 @@ + +#include +#include +using namespace Eigen; + +#ifndef SIZE +#define SIZE 50 +#endif + +#ifndef REPEAT +#define REPEAT 10000 +#endif + +typedef float Scalar; + +__attribute__ ((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size); +__attribute__ ((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c); +__attribute__ ((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c); + +int main(int argc, char* argv[]) +{ + int size = SIZE * 8; + int size2 = size * size; + Scalar* a = ei_aligned_malloc(size2); + Scalar* b = ei_aligned_malloc(size2); + Scalar* c = ei_aligned_malloc(size2); + + for (int i=0; i2 ; --innersize) + { + if (size2%innersize==0) + { + int outersize = size2/innersize; + MatrixXf ma = MatrixXf::map(a, innersize, outersize ); + MatrixXf mb = MatrixXf::map(b, innersize, outersize ); + MatrixXf mc = MatrixXf::map(c, innersize, outersize ); + timer.reset(); + for (int k=0; k<3; ++k) + { + timer.start(); + benchVec(ma, mb, mc); + timer.stop(); + } + std::cout << innersize << " x " << outersize << " " << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; + } + } + + VectorXf va = VectorXf::map(a, size2); + VectorXf vb = VectorXf::map(b, size2); + VectorXf vc = VectorXf::map(c, size2); + timer.reset(); + for (int k=0; k<3; ++k) + { + timer.start(); + benchVec(va, vb, vc); + timer.stop(); + } + std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; + + return 0; +} + +void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c) +{ + for (int k=0; k::type PacketScalar; + const int PacketSize = ei_packet_traits::size; + PacketScalar a0, a1, a2, a3, b0, b1, b2, b3; + for (int k=0; k