diff options
Diffstat (limited to 'bench')
-rw-r--r-- | bench/BenchTimer.h | 75 | ||||
-rw-r--r-- | bench/BenchUtil.h | 28 | ||||
-rw-r--r-- | bench/README.txt | 55 | ||||
-rw-r--r-- | bench/basicbench.cxxlist | 12 | ||||
-rw-r--r-- | bench/basicbenchmark.cpp | 46 | ||||
-rw-r--r-- | bench/basicbenchmark.h | 59 | ||||
-rwxr-xr-x | bench/bench_multi_compilers.sh | 28 | ||||
-rwxr-xr-x | bench/bench_unrolling | 11 | ||||
-rw-r--r-- | bench/benchmark.cpp | 31 | ||||
-rw-r--r-- | bench/benchmarkX.cpp | 22 | ||||
-rwxr-xr-x | bench/benchmark_suite | 17 | ||||
-rw-r--r-- | bench/ompbench.cxxlist | 7 | ||||
-rw-r--r-- | bench/ompbenchmark.cpp | 81 |
13 files changed, 472 insertions, 0 deletions
diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h new file mode 100644 index 000000000..e86c6ce13 --- /dev/null +++ b/bench/BenchTimer.h @@ -0,0 +1,75 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr> +// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr> +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see <http://www.gnu.org/licenses/>. + +#ifndef EIGEN_BENCH_TIMER_H +#define EIGEN_BENCH_TIMER_H + +#include <sys/time.h> +#include <unistd.h> +#include <cstdlib> + +namespace Eigen +{ + +/** Elapsed time timer keeping the best try. + */ +class BenchTimer +{ +public: + + BenchTimer() : m_best(1e99) {} + + ~BenchTimer() {} + + inline void start(void) {m_start = getTime();} + inline void stop(void) + { + m_best = std::min(m_best, getTime() - m_start); + } + + /** Return the best elapsed time. + */ + inline double value(void) + { + return m_best; + } + + static inline double getTime(void) + { + struct timeval tv; + struct timezone tz; + gettimeofday(&tv, &tz); + return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec; + } + +protected: + + double m_best, m_start; + +}; + +} + +#endif // EIGEN_BENCH_TIMER_H diff --git a/bench/BenchUtil.h b/bench/BenchUtil.h new file mode 100644 index 000000000..bb3c4611c --- /dev/null +++ b/bench/BenchUtil.h @@ -0,0 +1,28 @@ + +#include <Eigen/Core> +#include "BenchTimer.h" + +using namespace std; +USING_PART_OF_NAMESPACE_EIGEN + +#include <boost/preprocessor/repetition/enum_params.hpp> +#include <boost/preprocessor/repetition.hpp> +#include <boost/preprocessor/seq.hpp> +#include <boost/preprocessor/array.hpp> +#include <boost/preprocessor/arithmetic.hpp> +#include <boost/preprocessor/comparison.hpp> +#include <boost/preprocessor/punctuation.hpp> +#include <boost/preprocessor/punctuation/comma.hpp> +#include <boost/preprocessor/stringize.hpp> + +template<typename MatrixType> void initMatrix_random(MatrixType& mat) __attribute__((noinline)); +template<typename MatrixType> void initMatrix_random(MatrixType& mat) +{ + mat.setRandom();// = MatrixType::random(mat.rows(), mat.cols()); +} + +template<typename MatrixType> void initMatrix_identity(MatrixType& mat) __attribute__((noinline)); +template<typename MatrixType> void initMatrix_identity(MatrixType& mat) +{ + mat.setIdentity(); +} diff --git a/bench/README.txt b/bench/README.txt new file mode 100644 index 000000000..39831ae8a --- /dev/null +++ b/bench/README.txt @@ -0,0 +1,55 @@ + +This folder contains a couple of benchmark utities and Eigen benchmarks. + +**************************** +* bench_multi_compilers.sh * +**************************** + +This script allows to run a benchmark on a set of different compilers/compiler options. +It takes two arguments: + - a file defining the list of the compilers with their options + - the .cpp file of the benchmark + +Examples: + +$ ./bench_multi_compilers.sh basicbench.cxxlist basicbenchmark.cpp + + g++-4.1 -O3 -DNDEBUG -finline-limit=10000 + 3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 / + 0.271102 0.131416 0.422322 0.198633 + 0.201658 0.102436 0.397566 0.207282 + + g++-4.2 -O3 -DNDEBUG -finline-limit=10000 + 3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 / + 0.107805 0.0890579 0.30265 0.161843 + 0.127157 0.0712581 0.278341 0.191029 + + g++-4.3 -O3 -DNDEBUG -finline-limit=10000 + 3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 / + 0.134318 0.105291 0.3704 0.180966 + 0.137703 0.0732472 0.31225 0.202204 + + icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size + 3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 / + 0.226145 0.0941319 0.371873 0.159433 + 0.109302 0.0837538 0.328102 0.173891 + + +$ ./bench_multi_compilers.sh ompbench.cxxlist ompbenchmark.cpp + + g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp + double, fixed-size 4x4: 0.00165105s 0.0778739s + double, 32x32: 0.0654769s 0.075289s => x0.869674 (2) + double, 128x128: 0.054148s 0.0419669s => x1.29025 (2) + double, 512x512: 0.913799s 0.428533s => x2.13239 (2) + double, 1024x1024: 14.5972s 9.3542s => x1.5605 (2) + + icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp + double, fixed-size 4x4: 0.000589848s 0.019949s + double, 32x32: 0.0682781s 0.0449722s => x1.51823 (2) + double, 128x128: 0.0547509s 0.0435519s => x1.25714 (2) + double, 512x512: 0.829436s 0.424438s => x1.9542 (2) + double, 1024x1024: 14.5243s 10.7735s => x1.34815 (2) + + + diff --git a/bench/basicbench.cxxlist b/bench/basicbench.cxxlist new file mode 100644 index 000000000..93266aaf2 --- /dev/null +++ b/bench/basicbench.cxxlist @@ -0,0 +1,12 @@ +#!/bin/bash + +CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG" +CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG -finline-limit=20000" + +CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG" +CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000" + +CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG" +CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000" + +CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size"
\ No newline at end of file diff --git a/bench/basicbenchmark.cpp b/bench/basicbenchmark.cpp new file mode 100644 index 000000000..c44ed4514 --- /dev/null +++ b/bench/basicbenchmark.cpp @@ -0,0 +1,46 @@ + +#include "BenchUtil.h" +#include "basicbenchmark.h" + +int main(int argc, char *argv[]) +{ + // disbale floating point exceptions + // this leads to more stable bench results + // (this is done by default by ICC) + #ifndef __INTEL_COMPILER + { + int aux; + asm( + "stmxcsr %[aux] \n\t" + "orl $32832, %[aux] \n\t" + "ldmxcsr %[aux] \n\t" + : : [aux] "m" (aux)); + } + #endif + + // this is the list of matrix type and size we want to bench: + // ((suffix) (matrix size) (number of iterations)) + #define MODES ((3d)(3)(4000000)) ((4d)(4)(1000000)) ((Xd)(4)(1000000)) ((Xd)(20)(10000)) +// #define MODES ((Xd)(20)(10000)) + + #define _GENERATE_HEADER(R,ARG,EL) << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) << "-" \ + << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" \ + << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << " / " + + std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES ) << endl; + + const int tries = 10; + + #define _RUN_BENCH(R,ARG,EL) \ + std::cout << ARG( \ + BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL)) (\ + BOOST_PP_SEQ_ELEM(1,EL),BOOST_PP_SEQ_ELEM(1,EL)), BOOST_PP_SEQ_ELEM(2,EL), tries) \ + << " "; + + BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<LazyEval>, MODES ); + std::cout << endl; + BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<EarlyEval>, MODES ); + std::cout << endl; + + return 0; +} diff --git a/bench/basicbenchmark.h b/bench/basicbenchmark.h new file mode 100644 index 000000000..60e1c0258 --- /dev/null +++ b/bench/basicbenchmark.h @@ -0,0 +1,59 @@ + +enum {LazyEval, EarlyEval, OmpEval}; + +template<int Mode, typename MatrixType> +void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) __attribute__((noinline)); + +template<int Mode, typename MatrixType> +void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) +{ + for(int a = 0; a < iterations; a++) + { + if (Mode==LazyEval) + { + asm("#begin_bench_loop LazyEval"); + if (MatrixType::Traits::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize"); + m = (I + 0.00005 * (m + m.lazyProduct(m))).eval(); + } + else if (Mode==OmpEval) + { + asm("#begin_bench_loop OmpEval"); + if (MatrixType::Traits::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize"); + m = (I + 0.00005 * (m + m.lazyProduct(m))).evalOMP(); + } + else + { + asm("#begin_bench_loop EarlyEval"); + if (MatrixType::Traits::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize"); + m = I + 0.00005 * (m + m * m); + } + asm("#end_bench_loop"); + } +} + +template<int Mode, typename MatrixType> +double benchBasic(const MatrixType& mat, int size, int tries) __attribute__((noinline)); + +template<int Mode, typename MatrixType> +double benchBasic(const MatrixType& mat, int iterations, int tries) +{ + const int rows = mat.rows(); + const int cols = mat.cols(); + + MatrixType I(rows,cols); + MatrixType m(rows,cols); + + initMatrix_identity(I); + + Eigen::BenchTimer timer; + for(uint t=0; t<tries; ++t) + { + initMatrix_random(m); + timer.start(); + benchBasic_loop<Mode>(I, m, iterations); + timer.stop(); + cerr << m; + } + return timer.value(); +}; + diff --git a/bench/bench_multi_compilers.sh b/bench/bench_multi_compilers.sh new file mode 100755 index 000000000..ce5586fb9 --- /dev/null +++ b/bench/bench_multi_compilers.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +if (($# < 2)); then + echo "Usage: $0 compilerlist.txt benchfile.cpp" +else + +compilerlist=$1 +benchfile=$2 + +g=0 +source $compilerlist + +# for each compiler, compile benchfile and run the benchmark +for (( i=0 ; i<g ; ++i )) ; do + # check the compiler exists + compiler=`echo ${CLIST[$i]} | cut -d " " -f 1` + if [ -e `which $compiler` ]; then + echo "${CLIST[$i]}" +# echo "${CLIST[$i]} $benchfile -I.. -o bench~" + if [ -e ./.bench ] ; then rm .bench; fi + ${CLIST[$i]} $benchfile -I.. -o .bench && ./.bench 2> /dev/null + echo "" + else + echo "compiler not found: $compiler" + fi +done + +fi diff --git a/bench/bench_unrolling b/bench/bench_unrolling new file mode 100755 index 000000000..4af791412 --- /dev/null +++ b/bench/bench_unrolling @@ -0,0 +1,11 @@ +#!/bin/bash + +# gcc : CXX="g++ -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000" +# icc : CXX="icpc -fast -no-inline-max-size -fno-exceptions" + +for ((i=1; i<16; ++i)); do + echo "Matrix size: $i x $i :" + $CXX -O3 -I.. -DNDEBUG benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT_OPEQUAL=1024 -DEIGEN_UNROLLING_LIMIT_PRODUCT=25 -o benchmark && time ./benchmark >/dev/null + $CXX -O3 -I.. -DNDEBUG -finline-limit=10000 benchmark.cpp -DMATSIZE=$i -DEIGEN_DONT_USE_UNROLLED_LOOPS=1 -o benchmark && time ./benchmark >/dev/null + echo " " +done diff --git a/bench/benchmark.cpp b/bench/benchmark.cpp new file mode 100644 index 000000000..0d95a5043 --- /dev/null +++ b/bench/benchmark.cpp @@ -0,0 +1,31 @@ +// g++ -O3 -DNDEBUG -DMATSIZE=<x> benchmark.cpp -o benchmark && time ./benchmark +#include <cstdlib> +#include <cmath> +#include <Eigen/Core> + +#ifndef MATSIZE +#define MATSIZE 3 +#endif + +using namespace std; +USING_PART_OF_NAMESPACE_EIGEN + +int main(int argc, char *argv[]) +{ + Matrix<double,MATSIZE,MATSIZE> I; + Matrix<double,MATSIZE,MATSIZE> m; + for(int i = 0; i < MATSIZE; i++) + for(int j = 0; j < MATSIZE; j++) + { + I(i,j) = (i==j); + m(i,j) = (i+MATSIZE*j); + } + asm("#begin"); + for(int a = 0; a < 40000000; a++) + { + m = I + 0.00005 * (m + m*m); + } + asm("#end"); + cout << m << endl; + return 0; +} diff --git a/bench/benchmarkX.cpp b/bench/benchmarkX.cpp new file mode 100644 index 000000000..09173e1ed --- /dev/null +++ b/bench/benchmarkX.cpp @@ -0,0 +1,22 @@ +// g++ -O3 -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX + +#include <Eigen/Core> + +using namespace std; +USING_PART_OF_NAMESPACE_EIGEN + +int main(int argc, char *argv[]) +{ + MatrixXd I = MatrixXd::identity(20,20); + MatrixXd m(20,20); + for(int i = 0; i < 20; i++) for(int j = 0; j < 20; j++) + { + m(i,j) = 0.1 * (i+20*j); + } + for(int a = 0; a < 100000; a++) + { + m = I + 0.00005 * (m + m*m); + } + cout << m << endl; + return 0; +} diff --git a/bench/benchmark_suite b/bench/benchmark_suite new file mode 100755 index 000000000..9ddfccbf6 --- /dev/null +++ b/bench/benchmark_suite @@ -0,0 +1,17 @@ +#!/bin/bash +echo "Fixed size 3x3, ColumnMajor, -DNDEBUG" +$CXX -O3 -I .. -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null +echo "Fixed size 3x3, ColumnMajor, with asserts" +$CXX -O3 -I .. benchmark.cpp -o benchmark && time ./benchmark >/dev/null +echo "Fixed size 3x3, RowMajor, -DNDEBUG" +$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null +echo "Fixed size 3x3, RowMajor, with asserts" +$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR benchmark.cpp -o benchmark && time ./benchmark >/dev/null +echo "Dynamic size 20x20, ColumnMajor, -DNDEBUG" +$CXX -O3 -I .. -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null +echo "Dynamic size 20x20, ColumnMajor, with asserts" +$CXX -O3 -I .. benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null +echo "Dynamic size 20x20, RowMajor, -DNDEBUG" +$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null +echo "Dynamic size 20x20, RowMajor, with asserts" +$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null diff --git a/bench/ompbench.cxxlist b/bench/ompbench.cxxlist new file mode 100644 index 000000000..fc6681d33 --- /dev/null +++ b/bench/ompbench.cxxlist @@ -0,0 +1,7 @@ +#!/bin/bash + +CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp" + +# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=10000 -fopenmp" + +CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp"
\ No newline at end of file diff --git a/bench/ompbenchmark.cpp b/bench/ompbenchmark.cpp new file mode 100644 index 000000000..ac5155cb8 --- /dev/null +++ b/bench/ompbenchmark.cpp @@ -0,0 +1,81 @@ +// g++ -O3 -DNDEBUG -I.. -fopenmp benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null +// icpc -fast -fno-exceptions -DNDEBUG -I.. -openmp benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null + +#include <omp.h> +#include "BenchUtil.h" +#include "basicbenchmark.h" + +// #include <Eigen/Core> +// #include "BenchTimer.h" +// +// using namespace std; +// USING_PART_OF_NAMESPACE_EIGEN +// +// enum {LazyEval, EarlyEval, OmpEval}; +// +// template<int Mode, typename MatrixType> +// double benchSingleProc(const MatrixType& mat, int iterations, int tries) __attribute__((noinline)); +// +// template<int Mode, typename MatrixType> +// double benchBasic(const MatrixType& mat, int iterations, int tries) +// { +// const int rows = mat.rows(); +// const int cols = mat.cols(); +// +// Eigen::BenchTimer timer; +// for(uint t=0; t<tries; ++t) +// { +// MatrixType I = MatrixType::identity(rows, cols); +// MatrixType m = MatrixType::random(rows, cols); +// +// timer.start(); +// for(int a = 0; a < iterations; a++) +// { +// if(Mode==LazyEval) +// m = (I + 0.00005 * (m + m.lazyProduct(m))).eval(); +// else if(Mode==OmpEval) +// m = (I + 0.00005 * (m + m.lazyProduct(m))).evalOMP(); +// else +// m = I + 0.00005 * (m + m * m); +// } +// timer.stop(); +// cerr << m; +// } +// return timer.value(); +// }; + +int main(int argc, char *argv[]) +{ + // disbale floating point exceptions + // this leads to more stable bench results + { + int aux; + asm( + "stmxcsr %[aux] \n\t" + "orl $32832, %[aux] \n\t" + "ldmxcsr %[aux] \n\t" + : : [aux] "m" (aux)); + } + + // commented since the default setting is use as many threads as processors + //omp_set_num_threads(omp_get_num_procs()); + + std::cout << "double, fixed-size 4x4: " + << benchBasic<LazyEval>(Matrix4d(), 10000, 10) << "s " + << benchBasic<OmpEval>(Matrix4d(), 10000, 10) << "s \n"; + + #define BENCH_MATRIX(TYPE, SIZE, ITERATIONS, TRIES) {\ + double single = benchBasic<LazyEval>(Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \ + double omp = benchBasic<OmpEval> (Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \ + std::cout << #TYPE << ", " << #SIZE << "x" << #SIZE << ": " << single << "s " << omp << "s " \ + << " => x" << single/omp << " (" << omp_get_num_procs() << ")" << std::endl; \ + } + + BENCH_MATRIX(double, 32, 1000, 10); + BENCH_MATRIX(double, 128, 10, 10); + BENCH_MATRIX(double, 512, 1, 6); + BENCH_MATRIX(double, 1024, 1, 4); + + return 0; +} + |