From 445c015751ecf1c9a22c9a32a1ceb01e07bb2064 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 5 Dec 2016 13:36:26 +0100 Subject: extend monitoring benchmarks with transpose matrix-vector and triangular matrix-vectors. --- bench/perf_monitoring/gemm/changesets.txt | 21 ++++++---- bench/perf_monitoring/gemm/gemm.cpp | 5 ++- bench/perf_monitoring/gemm/gemv.cpp | 60 +-------------------------- bench/perf_monitoring/gemm/gemv_common.h | 68 +++++++++++++++++++++++++++++++ bench/perf_monitoring/gemm/gemvt.cpp | 12 ++++++ bench/perf_monitoring/gemm/lazy_gemm.cpp | 5 ++- bench/perf_monitoring/gemm/make_plot.sh | 7 ++-- bench/perf_monitoring/gemm/run.sh | 19 +++++---- bench/perf_monitoring/gemm/runall.sh | 22 ++++++++++ bench/perf_monitoring/gemm/trmv_lo.cpp | 12 ++++++ bench/perf_monitoring/gemm/trmv_lot.cpp | 12 ++++++ bench/perf_monitoring/gemm/trmv_up.cpp | 12 ++++++ bench/perf_monitoring/gemm/trmv_upt.cpp | 12 ++++++ 13 files changed, 189 insertions(+), 78 deletions(-) create mode 100644 bench/perf_monitoring/gemm/gemv_common.h create mode 100644 bench/perf_monitoring/gemm/gemvt.cpp create mode 100755 bench/perf_monitoring/gemm/runall.sh create mode 100644 bench/perf_monitoring/gemm/trmv_lo.cpp create mode 100644 bench/perf_monitoring/gemm/trmv_lot.cpp create mode 100644 bench/perf_monitoring/gemm/trmv_up.cpp create mode 100644 bench/perf_monitoring/gemm/trmv_upt.cpp (limited to 'bench') diff --git a/bench/perf_monitoring/gemm/changesets.txt b/bench/perf_monitoring/gemm/changesets.txt index 4bff3cc4a..9a2336390 100644 --- a/bench/perf_monitoring/gemm/changesets.txt +++ b/bench/perf_monitoring/gemm/changesets.txt @@ -25,13 +25,13 @@ before-evaluators #6742:0cbd6195e829 # merge default to tensors #6747:853d2bafeb8f # Generalized the gebp apis 6765:71584fd55762 # Made the blocking computation aware of the l3 cache; Also optimized the blocking parameters to take into account the number of threads used for a computation -#6781:9cc5a931b2c6 # generalized gemv -#6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product +6781:9cc5a931b2c6 # generalized gemv +6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product #6844:039efd86b75c # merge tensor 6845:7333ed40c6ef # change prefetching in gebp #6856:b5be5e10eb7f # merge index conversion -#6893:c3a64aba7c70 # clean blocking size computation -#6898:6fb31ebe6492 # rotating kernel for ARM +6893:c3a64aba7c70 # clean blocking size computation +6898:6fb31ebe6492 # rotating kernel for ARM 6899:877facace746 # rotating kernel for ARM only #6904:c250623ae9fa # result_of 6921:915f1b1fc158 # fix prefetching change for ARM @@ -50,7 +50,7 @@ before-evaluators 7098:b6f1db9cf9ec # Bug 992: don't select a 3p GEMM path with non-vectorizable scalar types, this hits unsupported paths in symm/triangular products code 7591:09a8e2186610 # 3.3-alpha1 7650:b0f3c8f43025 # help clang inlining -#8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs) +8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs) 8789:efcb912e4356 # Made the index type a template parameter to evaluateProductBlockingSizes. Use numext::mini and numext::maxi instead of std::min/std::max to compute blocking sizes 8972:81d53c711775 # Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path 8985:d935df21a082 # Remove the rotating kernel. @@ -59,6 +59,11 @@ before-evaluators 9174:d228bc282ac9 # merge 9212:c90098affa7b # Fix performance regression introduced in changeset 8aad8f35c955 9213:9f1c14e4694b # Fix performance regression in dgemm introduced by changeset 81d53c711775 -3.3-beta2 -3.3-rc1 -3.3.0 +9361:69d418c06999 # 3.3-beta2 +9583:bef509908b9d # 3.3-rc1 +9792:26667be4f70b # 3.3.0 +9942:b1d3eba60130 # Operators += and -= do not resize! +9943:79bb9887afd4 # Ease compiler job to generate clean and efficient code in mat*vec +9946:2213991340ea # Complete rewrite of column-major-matrix * vector product to deliver higher performance of modern CPU. +9953:21acc0e8d782 # Improve performance of row-major-dense-matrix * vector products for recent CPUs. + diff --git a/bench/perf_monitoring/gemm/gemm.cpp b/bench/perf_monitoring/gemm/gemm.cpp index 614bd4737..3ef37d21b 100644 --- a/bench/perf_monitoring/gemm/gemm.cpp +++ b/bench/perf_monitoring/gemm/gemm.cpp @@ -53,7 +53,10 @@ int main(int argc, char **argv) { std::vector results; - std::ifstream settings("gemm_settings.txt"); + std::string filename = std::string("gemm_settings.txt"); + if(argc>1) + filename = std::string(argv[1]); + std::ifstream settings(filename); long m, n, k; while(settings >> m >> n >> k) { diff --git a/bench/perf_monitoring/gemm/gemv.cpp b/bench/perf_monitoring/gemm/gemv.cpp index b7441a357..82e5ab960 100644 --- a/bench/perf_monitoring/gemm/gemv.cpp +++ b/bench/perf_monitoring/gemm/gemv.cpp @@ -1,18 +1,4 @@ -#include -#include -#include -#include -#include "../../BenchTimer.h" -using namespace Eigen; - -#ifndef SCALAR -#error SCALAR must be defined -#endif - -typedef SCALAR Scalar; - -typedef Matrix Mat; -typedef Matrix Vec; +#include "gemv_common.h" EIGEN_DONT_INLINE void gemv(const Mat &A, const Vec &B, Vec &C) @@ -20,49 +6,7 @@ void gemv(const Mat &A, const Vec &B, Vec &C) C.noalias() += A * B; } -EIGEN_DONT_INLINE -double bench(long m, long n) -{ - Mat A(m,n); - Vec B(n); - Vec C(m); - A.setRandom(); - B.setRandom(); - C.setZero(); - - BenchTimer t; - - double up = 1e9*4/sizeof(Scalar); - double tm0 = 4, tm1 = 10; - if(NumTraits::IsComplex) - { - up /= 4; - tm0 = 2; - tm1 = 4; - } - - double flops = 2. * m * n; - long rep = std::max(1., std::min(100., up/flops) ); - long tries = std::max(tm0, std::min(tm1, up/flops) ); - - BENCH(t, tries, rep, gemv(A,B,C)); - - return 1e-9 * rep * flops / t.best(); -} - int main(int argc, char **argv) { - std::vector results; - - std::ifstream settings("gemv_settings.txt"); - long m, n; - while(settings >> m >> n) - { - //std::cerr << " Testing " << m << " " << n << " " << k << std::endl; - results.push_back( bench(m, n) ); - } - - std::cout << RowVectorXd::Map(results.data(), results.size()); - - return 0; + return main_gemv(argc, argv, gemv); } diff --git a/bench/perf_monitoring/gemm/gemv_common.h b/bench/perf_monitoring/gemm/gemv_common.h new file mode 100644 index 000000000..65ee6cbd1 --- /dev/null +++ b/bench/perf_monitoring/gemm/gemv_common.h @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include +#include "../../BenchTimer.h" +using namespace Eigen; + +#ifndef SCALAR +#error SCALAR must be defined +#endif + +typedef SCALAR Scalar; + +typedef Matrix Mat; +typedef Matrix Vec; + +template +EIGEN_DONT_INLINE +double bench(long m, long n, Func &f) +{ + Mat A(m,n); + Vec B(n); + Vec C(m); + A.setRandom(); + B.setRandom(); + C.setRandom(); + + BenchTimer t; + + double up = 1e8/sizeof(Scalar); + double tm0 = 4, tm1 = 10; + if(NumTraits::IsComplex) + { + up /= 4; + tm0 = 2; + tm1 = 4; + } + + double flops = 2. * m * n; + long rep = std::max(1., std::min(100., up/flops) ); + long tries = std::max(tm0, std::min(tm1, up/flops) ); + + BENCH(t, tries, rep, f(A,B,C)); + + return 1e-9 * rep * flops / t.best(); +} + +template +int main_gemv(int argc, char **argv, Func& f, const std::string &setting_filename) +{ + std::vector results; + + std::string filename = std::string("gemv_settings.txt"); + if(argc>1) + filename = std::string(argv[1]); + std::ifstream settings(setting_filename); + long m, n; + while(settings >> m >> n) + { + //std::cerr << " Testing " << m << " " << n << std::endl; + results.push_back( bench(m, n, f) ); + } + + std::cout << RowVectorXd::Map(results.data(), results.size()); + + return 0; +} diff --git a/bench/perf_monitoring/gemm/gemvt.cpp b/bench/perf_monitoring/gemm/gemvt.cpp new file mode 100644 index 000000000..fe945767e --- /dev/null +++ b/bench/perf_monitoring/gemm/gemvt.cpp @@ -0,0 +1,12 @@ +#include "gemv_common.h" + +EIGEN_DONT_INLINE +void gemv(const Mat &A, Vec &B, const Vec &C) +{ + B.noalias() += A.transpose() * C; +} + +int main(int argc, char **argv) +{ + return main_gemv(argc, argv, gemv); +} diff --git a/bench/perf_monitoring/gemm/lazy_gemm.cpp b/bench/perf_monitoring/gemm/lazy_gemm.cpp index 6dc370155..773306048 100644 --- a/bench/perf_monitoring/gemm/lazy_gemm.cpp +++ b/bench/perf_monitoring/gemm/lazy_gemm.cpp @@ -84,7 +84,10 @@ int main(int argc, char **argv) { std::vector results; - std::ifstream settings("lazy_gemm_settings.txt"); + std::string filename = std::string("lazy_gemm_settings.txt"); + if(argc>1) + filename = std::string(argv[1]); + std::ifstream settings(filename); long m, n, k, t; while(settings >> m >> n >> k >> t) { diff --git a/bench/perf_monitoring/gemm/make_plot.sh b/bench/perf_monitoring/gemm/make_plot.sh index cd3214ac9..11f3c53be 100755 --- a/bench/perf_monitoring/gemm/make_plot.sh +++ b/bench/perf_monitoring/gemm/make_plot.sh @@ -5,6 +5,7 @@ # and generates $1.pdf WHAT=$1 bench=$2 +settings_file=$3 header="rev " while read line @@ -12,7 +13,7 @@ do if [ ! -z '$line' ]; then header="$header \"$line\"" fi -done < $bench"_settings.txt" +done < $settings_file echo $header > $WHAT.out.header cat $WHAT.out >> $WHAT.out.header @@ -25,7 +26,7 @@ echo "set xtics rotate 1" >> $WHAT.gnuplot echo "set term pdf color rounded enhanced fontscale 0.35 size 7in,5in" >> $WHAT.gnuplot echo set output "'"$WHAT.pdf"'" >> $WHAT.gnuplot -col=`cat $bench"_settings.txt" | wc -l` +col=`cat $settings_file | wc -l` echo "plot for [col=2:$col+1] '$WHAT.out.header' using 0:col:xticlabels(1) with lines" >> $WHAT.gnuplot echo " " >> $WHAT.gnuplot @@ -35,4 +36,4 @@ gnuplot -persist < $WHAT.gnuplot # convert -background white -density 120 -rotate 90 -resize 800 +dither -colors 256 -quality 0 $WHAT.ps -background white -flatten .$WHAT.png # clean -rm $WHAT.out.header $WHAT.gnuplot \ No newline at end of file +rm $WHAT.out.header $WHAT.gnuplot diff --git a/bench/perf_monitoring/gemm/run.sh b/bench/perf_monitoring/gemm/run.sh index 9d6ee40bc..44a3457e0 100755 --- a/bench/perf_monitoring/gemm/run.sh +++ b/bench/perf_monitoring/gemm/run.sh @@ -1,17 +1,22 @@ #!/bin/bash -# ./run.sh gemm -# ./run.sh lazy_gemm +# ./run.sh gemm gemm_settings.txt +# ./run.sh lazy_gemm lazy_gemm_settings.txt +# ./run.sh gemv gemv_settings.txt +# ./run.sh trmv_up gemv_square_settings.txt +# ... # Examples of environment variables to be set: # PREFIX="haswell-fma-" # CXX_FLAGS="-mfma" +# CXX=clang++ # Options: # -up : enforce the recomputation of existing data, and keep best results as a merging strategy # -s : recompute selected changesets only and keep bests bench=$1 +settings_file=$2 if echo "$*" | grep '\-up' > /dev/null; then update=true @@ -88,7 +93,7 @@ function test_current fi res=$prev count_rev=`echo $prev | wc -w` - count_ref=`cat $bench"_settings.txt" | wc -l` + count_ref=`cat $settings_file | wc -l` if echo "$global_args" | grep "$rev" > /dev/null; then rev_found=true else @@ -98,7 +103,7 @@ function test_current # echo $count_rev et $count_ref if [ $update == true ] || [ $count_rev != $count_ref ] || ([ $selected == true ] && [ $rev_found == true ]); then if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src $bench.cpp -DSCALAR=$scalar -o $name; then - curr=`./$name` + curr=`./$name $settings_file` if [ $count_rev == $count_ref ]; then echo "merge previous $prev" echo "with new $curr" @@ -149,8 +154,8 @@ echo "Complex:" cat $PREFIX"c""$bench.out" echo "" -./make_plot.sh $PREFIX"s"$bench $bench -./make_plot.sh $PREFIX"d"$bench $bench -./make_plot.sh $PREFIX"c"$bench $bench +./make_plot.sh $PREFIX"s"$bench $bench $settings_file +./make_plot.sh $PREFIX"d"$bench $bench $settings_file +./make_plot.sh $PREFIX"c"$bench $bench $settings_file diff --git a/bench/perf_monitoring/gemm/runall.sh b/bench/perf_monitoring/gemm/runall.sh new file mode 100755 index 000000000..2dcf655ef --- /dev/null +++ b/bench/perf_monitoring/gemm/runall.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# ./runall.sh + +# Examples of environment variables to be set: +# PREFIX="haswell-fma-" +# CXX_FLAGS="-mfma" +# CXX=clang++ + +# Options: +# -up : enforce the recomputation of existing data, and keep best results as a merging strategy +# -s : recompute selected changesets only and keep bests + +./run.sh gemm gemm_settings.txt $* +./run.sh lazy_gemm lazy_gemm_settings.txt $* +./run.sh gemv gemv_settings.txt $* +./run.sh gemvt gemv_settings.txt $* +./run.sh trmv_up gemv_square_settings.txt $* +./run.sh trmv_lo gemv_square_settings.txt $* +./run.sh trmv_upt gemv_square_settings.txt $* +./run.sh trmv_lot gemv_square_settings.txt $* + diff --git a/bench/perf_monitoring/gemm/trmv_lo.cpp b/bench/perf_monitoring/gemm/trmv_lo.cpp new file mode 100644 index 000000000..3fabb6e54 --- /dev/null +++ b/bench/perf_monitoring/gemm/trmv_lo.cpp @@ -0,0 +1,12 @@ +#include "gemv_common.h" + +EIGEN_DONT_INLINE +void trmv(const Mat &A, const Vec &B, Vec &C) +{ + C.noalias() += A.triangularView() * B; +} + +int main(int argc, char **argv) +{ + return main_gemv(argc, argv, trmv); +} diff --git a/bench/perf_monitoring/gemm/trmv_lot.cpp b/bench/perf_monitoring/gemm/trmv_lot.cpp new file mode 100644 index 000000000..d0c15ef68 --- /dev/null +++ b/bench/perf_monitoring/gemm/trmv_lot.cpp @@ -0,0 +1,12 @@ +#include "gemv_common.h" + +EIGEN_DONT_INLINE +void gemv(const Mat &A, Vec &B, const Vec &C) +{ + B.noalias() += A.transpose().triangularView() * C; +} + +int main(int argc, char **argv) +{ + return main_gemv(argc, argv, trmv); +} diff --git a/bench/perf_monitoring/gemm/trmv_up.cpp b/bench/perf_monitoring/gemm/trmv_up.cpp new file mode 100644 index 000000000..c58e471ec --- /dev/null +++ b/bench/perf_monitoring/gemm/trmv_up.cpp @@ -0,0 +1,12 @@ +#include "gemv_common.h" + +EIGEN_DONT_INLINE +void trmv(const Mat &A, const Vec &B, Vec &C) +{ + C.noalias() += A.triangularView() * B; +} + +int main(int argc, char **argv) +{ + return main_gemv(argc, argv, trmv); +} diff --git a/bench/perf_monitoring/gemm/trmv_upt.cpp b/bench/perf_monitoring/gemm/trmv_upt.cpp new file mode 100644 index 000000000..511e00885 --- /dev/null +++ b/bench/perf_monitoring/gemm/trmv_upt.cpp @@ -0,0 +1,12 @@ +#include "gemv_common.h" + +EIGEN_DONT_INLINE +void trmv(const Mat &A, Vec &B, const Vec &C) +{ + B.noalias() += A.transpose().triangularView() * C; +} + +int main(int argc, char **argv) +{ + return main_gemv(argc, argv, trmv); +} -- cgit v1.2.3