From 1ff1d4a1241c40ac0cf3e5bc4e9827b2578a66b5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 5 Dec 2016 23:01:52 +0100 Subject: Add performance monitoring for LLT --- bench/perf_monitoring/gemm/changesets.txt | 5 +- bench/perf_monitoring/gemm/gemm.cpp | 62 +------------------- bench/perf_monitoring/gemm/gemm_common.h | 66 ++++++++++++++++++++++ .../perf_monitoring/gemm/gemm_square_settings.txt | 11 ++++ bench/perf_monitoring/gemm/gemv_common.h | 4 +- .../perf_monitoring/gemm/gemv_square_settings.txt | 13 +++++ bench/perf_monitoring/gemm/llt.cpp | 15 +++++ bench/perf_monitoring/gemm/runall.sh | 1 + bench/perf_monitoring/gemm/trmv_lot.cpp | 2 +- 9 files changed, 113 insertions(+), 66 deletions(-) create mode 100644 bench/perf_monitoring/gemm/gemm_common.h create mode 100644 bench/perf_monitoring/gemm/gemm_square_settings.txt create mode 100644 bench/perf_monitoring/gemm/gemv_square_settings.txt create mode 100644 bench/perf_monitoring/gemm/llt.cpp (limited to 'bench') diff --git a/bench/perf_monitoring/gemm/changesets.txt b/bench/perf_monitoring/gemm/changesets.txt index 9a2336390..8d9f15cdb 100644 --- a/bench/perf_monitoring/gemm/changesets.txt +++ b/bench/perf_monitoring/gemm/changesets.txt @@ -31,7 +31,6 @@ before-evaluators 6845:7333ed40c6ef # change prefetching in gebp #6856:b5be5e10eb7f # merge index conversion 6893:c3a64aba7c70 # clean blocking size computation -6898:6fb31ebe6492 # rotating kernel for ARM 6899:877facace746 # rotating kernel for ARM only #6904:c250623ae9fa # result_of 6921:915f1b1fc158 # fix prefetching change for ARM @@ -50,7 +49,7 @@ before-evaluators 7098:b6f1db9cf9ec # Bug 992: don't select a 3p GEMM path with non-vectorizable scalar types, this hits unsupported paths in symm/triangular products code 7591:09a8e2186610 # 3.3-alpha1 7650:b0f3c8f43025 # help clang inlining -8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs) +#8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs) 8789:efcb912e4356 # Made the index type a template parameter to evaluateProductBlockingSizes. Use numext::mini and numext::maxi instead of std::min/std::max to compute blocking sizes 8972:81d53c711775 # Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path 8985:d935df21a082 # Remove the rotating kernel. @@ -65,5 +64,5 @@ before-evaluators 9942:b1d3eba60130 # Operators += and -= do not resize! 9943:79bb9887afd4 # Ease compiler job to generate clean and efficient code in mat*vec 9946:2213991340ea # Complete rewrite of column-major-matrix * vector product to deliver higher performance of modern CPU. -9953:21acc0e8d782 # Improve performance of row-major-dense-matrix * vector products for recent CPUs. +9955:630471c3298c # Improve performance of row-major-dense-matrix * vector products for recent CPUs. (this is the next changeset fixing a typo) diff --git a/bench/perf_monitoring/gemm/gemm.cpp b/bench/perf_monitoring/gemm/gemm.cpp index 3ef37d21b..804139db7 100644 --- a/bench/perf_monitoring/gemm/gemm.cpp +++ b/bench/perf_monitoring/gemm/gemm.cpp @@ -1,17 +1,4 @@ -#include -#include -#include -#include -#include "../../BenchTimer.h" -using namespace Eigen; - -#ifndef SCALAR -#error SCALAR must be defined -#endif - -typedef SCALAR Scalar; - -typedef Matrix Mat; +#include "gemm_common.h" EIGEN_DONT_INLINE void gemm(const Mat &A, const Mat &B, Mat &C) @@ -19,52 +6,7 @@ void gemm(const Mat &A, const Mat &B, Mat &C) C.noalias() += A * B; } -EIGEN_DONT_INLINE -double bench(long m, long n, long k) -{ - Mat A(m,k); - Mat B(k,n); - Mat C(m,n); - A.setRandom(); - B.setRandom(); - C.setZero(); - - BenchTimer t; - - double up = 1e8*4/sizeof(Scalar); - double tm0 = 4, tm1 = 10; - if(NumTraits::IsComplex) - { - up /= 4; - tm0 = 2; - tm1 = 4; - } - - double flops = 2. * m * n * k; - long rep = std::max(1., std::min(100., up/flops) ); - long tries = std::max(tm0, std::min(tm1, up/flops) ); - - BENCH(t, tries, rep, gemm(A,B,C)); - - return 1e-9 * rep * flops / t.best(); -} - int main(int argc, char **argv) { - std::vector results; - - std::string filename = std::string("gemm_settings.txt"); - if(argc>1) - filename = std::string(argv[1]); - std::ifstream settings(filename); - long m, n, k; - while(settings >> m >> n >> k) - { - //std::cerr << " Testing " << m << " " << n << " " << k << std::endl; - results.push_back( bench(m, n, k) ); - } - - std::cout << RowVectorXd::Map(results.data(), results.size()); - - return 0; + return main_gemm(argc, argv, gemm); } diff --git a/bench/perf_monitoring/gemm/gemm_common.h b/bench/perf_monitoring/gemm/gemm_common.h new file mode 100644 index 000000000..34ff5edb8 --- /dev/null +++ b/bench/perf_monitoring/gemm/gemm_common.h @@ -0,0 +1,66 @@ +#include +#include +#include +#include +#include "../../BenchTimer.h" +using namespace Eigen; + +#ifndef SCALAR +#error SCALAR must be defined +#endif + +typedef SCALAR Scalar; + +typedef Matrix Mat; + +template +EIGEN_DONT_INLINE +double bench(long m, long n, long k, const Func& f) +{ + Mat A(m,k); + Mat B(k,n); + Mat C(m,n); + A.setRandom(); + B.setRandom(); + C.setZero(); + + BenchTimer t; + + double up = 1e8*4/sizeof(Scalar); + double tm0 = 4, tm1 = 10; + if(NumTraits::IsComplex) + { + up /= 4; + tm0 = 2; + tm1 = 4; + } + + double flops = 2. * m * n * k; + long rep = std::max(1., std::min(100., up/flops) ); + long tries = std::max(tm0, std::min(tm1, up/flops) ); + + BENCH(t, tries, rep, f(A,B,C)); + + return 1e-9 * rep * flops / t.best(); +} + +template +int main_gemm(int argc, char **argv, const Func& f) +{ + std::vector results; + + std::string filename = std::string("gemm_settings.txt"); + if(argc>1) + filename = std::string(argv[1]); + std::ifstream settings(filename); + long m, n, k; + while(settings >> m >> n >> k) + { + //std::cerr << " Testing " << m << " " << n << " " << k << std::endl; + results.push_back( bench(m, n, k, f) ); + } + + std::cout << RowVectorXd::Map(results.data(), results.size()); + + return 0; +} diff --git a/bench/perf_monitoring/gemm/gemm_square_settings.txt b/bench/perf_monitoring/gemm/gemm_square_settings.txt new file mode 100644 index 000000000..98474d173 --- /dev/null +++ b/bench/perf_monitoring/gemm/gemm_square_settings.txt @@ -0,0 +1,11 @@ +8 8 8 +9 9 9 +12 12 12 +15 15 15 +16 16 16 +24 24 24 +102 102 102 +239 239 239 +240 240 240 +2400 2400 2400 +2463 2463 2463 diff --git a/bench/perf_monitoring/gemm/gemv_common.h b/bench/perf_monitoring/gemm/gemv_common.h index 65ee6cbd1..55ee0ff8b 100644 --- a/bench/perf_monitoring/gemm/gemv_common.h +++ b/bench/perf_monitoring/gemm/gemv_common.h @@ -47,14 +47,14 @@ double bench(long m, long n, Func &f) } template -int main_gemv(int argc, char **argv, Func& f, const std::string &setting_filename) +int main_gemv(int argc, char **argv, Func& f) { std::vector results; std::string filename = std::string("gemv_settings.txt"); if(argc>1) filename = std::string(argv[1]); - std::ifstream settings(setting_filename); + std::ifstream settings(filename); long m, n; while(settings >> m >> n) { diff --git a/bench/perf_monitoring/gemm/gemv_square_settings.txt b/bench/perf_monitoring/gemm/gemv_square_settings.txt new file mode 100644 index 000000000..5165759f4 --- /dev/null +++ b/bench/perf_monitoring/gemm/gemv_square_settings.txt @@ -0,0 +1,13 @@ +8 8 +9 9 +12 12 +15 15 +16 16 +24 24 +53 53 +74 74 +102 102 +239 239 +240 240 +2400 2400 +2463 2463 diff --git a/bench/perf_monitoring/gemm/llt.cpp b/bench/perf_monitoring/gemm/llt.cpp new file mode 100644 index 000000000..d55b7d803 --- /dev/null +++ b/bench/perf_monitoring/gemm/llt.cpp @@ -0,0 +1,15 @@ +#include "gemm_common.h" +#include + +EIGEN_DONT_INLINE +void llt(const Mat &A, const Mat &B, Mat &C) +{ + C = A; + C.diagonal().array() += 1000; + Eigen::internal::llt_inplace::blocked(C); +} + +int main(int argc, char **argv) +{ + return main_gemm(argc, argv, llt); +} diff --git a/bench/perf_monitoring/gemm/runall.sh b/bench/perf_monitoring/gemm/runall.sh index 2dcf655ef..4e4b4bcff 100755 --- a/bench/perf_monitoring/gemm/runall.sh +++ b/bench/perf_monitoring/gemm/runall.sh @@ -19,4 +19,5 @@ ./run.sh trmv_lo gemv_square_settings.txt $* ./run.sh trmv_upt gemv_square_settings.txt $* ./run.sh trmv_lot gemv_square_settings.txt $* +./run.sh llt gemm_square_settings.txt $* diff --git a/bench/perf_monitoring/gemm/trmv_lot.cpp b/bench/perf_monitoring/gemm/trmv_lot.cpp index d0c15ef68..32e085aaf 100644 --- a/bench/perf_monitoring/gemm/trmv_lot.cpp +++ b/bench/perf_monitoring/gemm/trmv_lot.cpp @@ -1,7 +1,7 @@ #include "gemv_common.h" EIGEN_DONT_INLINE -void gemv(const Mat &A, Vec &B, const Vec &C) +void trmv(const Mat &A, Vec &B, const Vec &C) { B.noalias() += A.transpose().triangularView() * C; } -- cgit v1.2.3