From eedd5063fd5c28bf56a9ead96ea2c0df336e0158 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 6 Mar 2015 11:47:13 +0100 Subject: Update gemm performance monitoring tool: - permit to recompute a subset of changesets - update changeset list - add a few more cases --- bench/perf_monitoring/gemm/changesets.txt | 67 ++++++++++++++++--------------- bench/perf_monitoring/gemm/run_gemm.sh | 47 ++++++++++++++++++---- bench/perf_monitoring/gemm/settings.txt | 4 ++ 3 files changed, 78 insertions(+), 40 deletions(-) (limited to 'bench/perf_monitoring') diff --git a/bench/perf_monitoring/gemm/changesets.txt b/bench/perf_monitoring/gemm/changesets.txt index f19b4287d..b379d7bd2 100644 --- a/bench/perf_monitoring/gemm/changesets.txt +++ b/bench/perf_monitoring/gemm/changesets.txt @@ -1,39 +1,42 @@ -3.0.1 -3.1.1 -3.2.0 +#3.0.1 +#3.1.1 +#3.2.0 3.2.4 -5745:37f59e65eb6c -5891:d8652709345d -5893:24b4dc92c6d3 -5895:997c2ef9fc8b -5904:e1eafd14eaa1 -5908:f8ee3c721251 -5921:ca808bb456b0 -5927:8b1001f9e3ac -5937:5a4ca1ad8c53 -5949:f3488f4e45b2 -5969:e09031dccfd9 -5992:4a429f5e0483 +#5745:37f59e65eb6c +5891:d8652709345d # introduce AVX +#5893:24b4dc92c6d3 # merge +5895:997c2ef9fc8b # introduce FMA +#5904:e1eafd14eaa1 # complex and AVX +5908:f8ee3c721251 # improve packing with ptranspose +#5921:ca808bb456b0 # merge +#5927:8b1001f9e3ac +5937:5a4ca1ad8c53 # New gebp kernel handling up to 3 packets x 4 register-level blocks +#5949:f3488f4e45b2 # merge +#5969:e09031dccfd9 # Disable 3pX4 kernel on Altivec +#5992:4a429f5e0483 # merge before-evaluators -6334:f6a45e5b8b7c -6639:c9121c60b5c7 -6655:06f163b5221f -6677:700e023044e7 # FMA has been wrongly disabled -6681:11d31dafb0e3 -6699:5e6e8e10aad1 # merge default to tensors -6726:ff2d2388e7b9 # merge default to tensors -6742:0cbd6195e829 # merge default to tensors -6747:853d2bafeb8f # Generalized the gebp apis +#6334:f6a45e5b8b7c # Implement evaluator for sparse outer products +#6639:c9121c60b5c7 +#6655:06f163b5221f # Properly detect FMA support on ARM +#6677:700e023044e7 # FMA has been wrongly disabled +#6681:11d31dafb0e3 +#6699:5e6e8e10aad1 # merge default to tensors +#6726:ff2d2388e7b9 # merge default to tensors +#6742:0cbd6195e829 # merge default to tensors +#6747:853d2bafeb8f # Generalized the gebp apis 6765:71584fd55762 # Made the blocking computation aware of the l3 cache; Also optimized the blocking parameters to take into account the number of threads used for a computation -6781:9cc5a931b2c6 # generalized gemv -6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product -6844:039efd86b75c # merge tensor +#6781:9cc5a931b2c6 # generalized gemv +#6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product +#6844:039efd86b75c # merge tensor 6845:7333ed40c6ef # change prefetching in gebp -6856:b5be5e10eb7f # merge index conversion -6893:c3a64aba7c70 # clean blocking size computation -6898:6fb31ebe6492 # rotating kernel for ARM +#6856:b5be5e10eb7f # merge index conversion +#6893:c3a64aba7c70 # clean blocking size computation +#6898:6fb31ebe6492 # rotating kernel for ARM 6899:877facace746 # rotating kernel for ARM only -6904:c250623ae9fa # result_of +#6904:c250623ae9fa # result_of 6921:915f1b1fc158 # fix prefetching change for ARM 6923:9ff25f6dacc6 # prefetching -6933:52572e60b5d3 # blocking size strategy \ No newline at end of file +6933:52572e60b5d3 # blocking size strategy +6937:c8c042f286b2 # avoid redundant pack_rhs +6981:7e5d6f78da59 # dynamic loop swapping +6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache diff --git a/bench/perf_monitoring/gemm/run_gemm.sh b/bench/perf_monitoring/gemm/run_gemm.sh index d3a9fadc9..3fa6a3661 100755 --- a/bench/perf_monitoring/gemm/run_gemm.sh +++ b/bench/perf_monitoring/gemm/run_gemm.sh @@ -6,6 +6,7 @@ # Options: # -up : enforce the recomputation of existing data, and keep best results as a merging strategy +# -s : recompute selected changesets only and keep bests if echo "$*" | grep '\-up' > /dev/null; then @@ -14,14 +15,30 @@ else update=false fi -if [ $update == true ]; then +if echo "$*" | grep '\-s' > /dev/null; then + selected=true +else + selected=false +fi + +global_args="$*" + +if [ $selected == true ]; then + echo "Recompute selected changesets only and keep bests" +elif [ $update == true ]; then echo "(Re-)Compute all changesets and keep bests" else echo "Skip previously computed changesets" fi + + if [ ! -d "eigen_src" ]; then hg clone https://bitbucket.org/eigen/eigen eigen_src +else + cd eigen_src + hg pull -u + cd .. fi if [ ! -z '$CXX' ]; then @@ -61,17 +78,31 @@ function test_current scalar=$2 name=$3 - prev=`grep $rev "$name.backup" | cut -c 14-` + prev="" + if [ -e "$name.backup" ]; then + prev=`grep $rev "$name.backup" | cut -c 14-` + fi res=$prev count_rev=`echo $prev | wc -w` count_ref=`cat "settings.txt" | wc -l` - if [ $update == true ] || [ $count_rev != $count_ref ]; then + if echo "$global_args" | grep "$rev" > /dev/null; then + rev_found=true + else + rev_found=false + fi +# echo $update et $selected et $rev_found because $rev et "$global_args" +# echo $count_rev et $count_ref + if [ $update == true ] || [ $count_rev != $count_ref ] || ([ $selected == true ] && [ $rev_found == true ]); then if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src gemm.cpp -DSCALAR=$scalar -o $name; then curr=`./$name` - echo merge $prev - echo with $curr + if [ $count_rev == $count_ref ]; then + echo "merge previous $prev" + echo "with new $curr" + else + echo "got $curr" + fi res=`merge "$curr" "$prev"` - echo $res +# echo $res echo "$rev $res" >> $name.out else echo "Compilation failed, skip rev $rev" @@ -86,12 +117,12 @@ make_backup $PREFIX"sgemm" make_backup $PREFIX"dgemm" make_backup $PREFIX"cgemm" -cut -f1 -d"#" < changesets.txt | while read rev +cut -f1 -d"#" < changesets.txt | grep -E '[[:alnum:]]' | while read rev do if [ ! -z '$rev' ]; then echo "Testing rev $rev" cd eigen_src - hg up -C $rev + hg up -C $rev > /dev/null actual_rev=`hg identify | cut -f1 -d' '` cd .. diff --git a/bench/perf_monitoring/gemm/settings.txt b/bench/perf_monitoring/gemm/settings.txt index 6ef690708..5c43e1c7d 100644 --- a/bench/perf_monitoring/gemm/settings.txt +++ b/bench/perf_monitoring/gemm/settings.txt @@ -1,5 +1,6 @@ 8 8 8 9 9 9 +24 24 24 239 239 239 240 240 240 2400 24 24 @@ -8,4 +9,7 @@ 24 2400 2400 2400 24 2400 2400 2400 24 +2400 2400 64 +4800 23 160 +23 4800 160 2400 2400 2400 -- cgit v1.2.3