diff options
author | Gael Guennebaud <g.gael@free.fr> | 2008-08-04 23:12:48 +0000 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2008-08-04 23:12:48 +0000 |
commit | a7a05382d1c51964bf3ea0536c6ddd9cc9888b72 (patch) | |
tree | 413a9ecf342bf59ff685495eff70f999ee7803ac | |
parent | c2f8ecf46683adcab0db05199ee2ebe15e6ada4a (diff) |
Add a LU decomposition action in BTL and various cleaning in BTL. For instance
all per plot settings have been moved to a single file, go_mean now takes an
optional second argument "tiny" to generate plots for tiny matrices, and
output of comparison information wrt to previous benchs (if any).
40 files changed, 327 insertions, 109 deletions
diff --git a/bench/btl/CMakeLists.txt b/bench/btl/CMakeLists.txt index a7f90a2e9..8fb0ff889 100644 --- a/bench/btl/CMakeLists.txt +++ b/bench/btl/CMakeLists.txt @@ -9,7 +9,8 @@ OPTION(BTL_NOVEC "Disable SSE/Altivec optimizations when possible" OFF) SET(CMAKE_INCLUDE_CURRENT_DIR ON) -IF(CMAKE_COMPILER_IS_GNUCXX) +string(REGEX MATCH icpc IS_ICPC ${CMAKE_CXX_COMPILER}) +IF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC) SET(CMAKE_CXX_FLAGS "-g0 -O3 -DNDEBUG") SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG") IF(NOT BTL_NOVEC) @@ -18,7 +19,12 @@ IF(CMAKE_COMPILER_IS_GNUCXX) ELSE(NOT BTL_NOVEC) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE") ENDIF(NOT BTL_NOVEC) -ENDIF(CMAKE_COMPILER_IS_GNUCXX) +ENDIF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC) + +if(IS_ICPC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fast") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fast") +endif(IS_ICPC) include_directories( ${PROJECT_SOURCE_DIR}/actions @@ -76,3 +82,4 @@ add_subdirectory(libs/STL_algo) add_subdirectory(data) + diff --git a/bench/btl/actions/action_atv_product.hh b/bench/btl/actions/action_atv_product.hh index b46bb6de8..e148bc86d 100644 --- a/bench/btl/actions/action_atv_product.hh +++ b/bench/btl/actions/action_atv_product.hh @@ -102,7 +102,7 @@ public : if (error>1.e-6){ INFOS("WRONG CALCULATION...residual=" << error); - exit(0); + exit(1); } } diff --git a/bench/btl/actions/action_axpby.hh b/bench/btl/actions/action_axpby.hh index 7a4f158c1..705447a21 100644 --- a/bench/btl/actions/action_axpby.hh +++ b/bench/btl/actions/action_axpby.hh @@ -85,7 +85,9 @@ public : } inline void calculate( void ) { + asm("#mybegin axpby"); Interface::axpby(_alpha,X,_beta,Y,_size); + asm("#myend axpby"); } void check_result( void ){ diff --git a/bench/btl/actions/action_axpy.hh b/bench/btl/actions/action_axpy.hh index 8740e413d..50759f6f6 100644 --- a/bench/btl/actions/action_axpy.hh +++ b/bench/btl/actions/action_axpy.hh @@ -96,7 +96,9 @@ public : } inline void calculate( void ) { - Interface::axpy(_coef,X,Y,_size); + asm("#mybegin axpy"); + Interface::axpy(_coef,X,Y,_size); + asm("#myend axpy"); } void check_result( void ){ diff --git a/bench/btl/actions/action_lu_decomp.hh b/bench/btl/actions/action_lu_decomp.hh new file mode 100644 index 000000000..cf455e87f --- /dev/null +++ b/bench/btl/actions/action_lu_decomp.hh @@ -0,0 +1,124 @@ +//===================================================== +// File : action_lu_decomp.hh +// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_LU_DECOMP +#define ACTION_LU_DECOMP +#include "utilities.h" +#include "STL_interface.hh" +#include <string> +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template<class Interface> +class Action_lu_decomp { + +public : + + // Ctor + + Action_lu_decomp( int size ):_size(size) + { + MESSAGE("Action_lu_decomp Ctor"); + + // STL vector initialization + init_matrix<pseudo_random>(X_stl,_size); + + init_matrix<null_function>(C_stl,_size); + init_matrix<null_function>(resu_stl,_size); + + // generic matrix and vector initialization + Interface::matrix_from_stl(X_ref,X_stl); + Interface::matrix_from_stl(X,X_stl); + Interface::matrix_from_stl(C,C_stl); + + _cost = 2.0*size*size*size/3.0 + size*size; + } + + // invalidate copy ctor + + Action_lu_decomp( const Action_lu_decomp & ) + { + INFOS("illegal call to Action_lu_decomp Copy Ctor"); + exit(1); + } + + // Dtor + + ~Action_lu_decomp( void ){ + + MESSAGE("Action_lu_decomp Dtor"); + + // deallocation + Interface::free_matrix(X_ref,_size); + Interface::free_matrix(X,_size); + Interface::free_matrix(C,_size); + } + + // action name + + static inline std::string name( void ) + { + return "lu_decomp_"+Interface::name(); + } + + double nb_op_base( void ){ + return _cost; + } + + inline void initialize( void ){ + Interface::copy_matrix(X_ref,X,_size); + } + + inline void calculate( void ) { + Interface::lu_decomp(X,C,_size); + } + + void check_result( void ){ + // calculation check + Interface::matrix_to_stl(C,resu_stl); + +// STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size); +// +// typename Interface::real_type error= +// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl); +// +// if (error>1.e-6){ +// INFOS("WRONG CALCULATION...residual=" << error); +// exit(0); +// } + + } + +private : + + typename Interface::stl_matrix X_stl; + typename Interface::stl_matrix C_stl; + typename Interface::stl_matrix resu_stl; + + typename Interface::gene_matrix X_ref; + typename Interface::gene_matrix X; + typename Interface::gene_matrix C; + + int _size; + int _cost; +}; + +#endif diff --git a/bench/btl/actions/action_matrix_matrix_product.hh b/bench/btl/actions/action_matrix_matrix_product.hh index 9de677a7f..048594c70 100644 --- a/bench/btl/actions/action_matrix_matrix_product.hh +++ b/bench/btl/actions/action_matrix_matrix_product.hh @@ -120,7 +120,7 @@ public : if (error>1.e-6){ INFOS("WRONG CALCULATION...residual=" << error); - exit(1); +// exit(1); } } diff --git a/bench/btl/actions/action_matrix_vector_product.hh b/bench/btl/actions/action_matrix_vector_product.hh index 202bc1a85..272e9d417 100644 --- a/bench/btl/actions/action_matrix_vector_product.hh +++ b/bench/btl/actions/action_matrix_vector_product.hh @@ -121,7 +121,7 @@ public : if (error>1.e-5){ INFOS("WRONG CALCULATION...residual=" << error); -// exit(0); + exit(0); } } diff --git a/bench/btl/data/CMakeLists.txt b/bench/btl/data/CMakeLists.txt index 345e44fad..6af2a366f 100644 --- a/bench/btl/data/CMakeLists.txt +++ b/bench/btl/data/CMakeLists.txt @@ -1,9 +1,8 @@ ADD_CUSTOM_TARGET(copy_scripts) -SET(script_files go_mean aat.hh ata.hh axpy.hh axpby.hh cholesky.hh trisolve.hh hessenberg.hh tridiagonalization.hh - order_lib mk_mean_script.sh mk_new_gnuplot.sh mk_gnuplot_script.sh - matrix_matrix.hh matrix_vector.hh atv.hh perlib_plot_settings.txt gnuplot_common_settings.hh ) +SET(script_files go_mean mk_mean_script.sh mk_new_gnuplot.sh + perlib_plot_settings.txt action_settings.txt gnuplot_common_settings.hh ) FOREACH(script_file ${script_files}) ADD_CUSTOM_COMMAND( diff --git a/bench/btl/data/aat.hh b/bench/btl/data/aat.hh deleted file mode 100644 index 5751d768a..000000000 --- a/bench/btl/data/aat.hh +++ /dev/null @@ -1,4 +0,0 @@ -set title "{/*1.5 A x A^T}" -set xlabel "matrix size" 0.000000,0.000000 -set xrange [4:1024] - diff --git a/bench/btl/data/action_settings.txt b/bench/btl/data/action_settings.txt new file mode 100644 index 000000000..26557279b --- /dev/null +++ b/bench/btl/data/action_settings.txt @@ -0,0 +1,12 @@ +aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:1024 +ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:1024 +atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:1024 +axpby ; "{/*1.5 Y = alpha * X + beta * Y}" ; "vector size" ; 5:1000000 +axpy ; "{/*1.5 Y += alpha * X}" ; "vector size" ; 5:1000000 +matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:1024 +matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:1024 +trisolve ; "{/*1.5 triangular solver (X = inv(L) * X)}" ; "size" ; 4:1024 +cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:1024 +lu_decomp ; "{/*1.5 LU decomposition}" ; "matrix size" ; 4:1024 +tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:1024 +hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:1024
\ No newline at end of file diff --git a/bench/btl/data/ata.hh b/bench/btl/data/ata.hh deleted file mode 100644 index 9a87d2983..000000000 --- a/bench/btl/data/ata.hh +++ /dev/null @@ -1,4 +0,0 @@ -set title "{/*1.5 A^T x A}" -set xlabel "matrix size" 0.000000,0.000000 -set xrange [4:1024] - diff --git a/bench/btl/data/atv.hh b/bench/btl/data/atv.hh deleted file mode 100644 index a8e36a5f1..000000000 --- a/bench/btl/data/atv.hh +++ /dev/null @@ -1,3 +0,0 @@ -set title "{/*1.5 matrix^T x vector}" 0.000000,0.000000 -set xlabel "matrix size" 0.000000,0.000000 -set xrange [4:1024] diff --git a/bench/btl/data/axpby.hh b/bench/btl/data/axpby.hh deleted file mode 100644 index 84897d8e2..000000000 --- a/bench/btl/data/axpby.hh +++ /dev/null @@ -1,3 +0,0 @@ -set title "{/*1.5 Y = alpha * X + beta * Y}" 0.000000,0.000000 -set xlabel "vector size" 0.000000,0.000000 -set xrange [5:1000000] diff --git a/bench/btl/data/axpy.hh b/bench/btl/data/axpy.hh deleted file mode 100644 index d3fe8df69..000000000 --- a/bench/btl/data/axpy.hh +++ /dev/null @@ -1,3 +0,0 @@ -set title "{/*1.5 Y += alpha * X}" 0.000000,0.000000 -set xlabel "vector size" 0.000000,0.000000 -set xrange [5:1000000] diff --git a/bench/btl/data/cholesky.hh b/bench/btl/data/cholesky.hh deleted file mode 100644 index 78336b4ee..000000000 --- a/bench/btl/data/cholesky.hh +++ /dev/null @@ -1,3 +0,0 @@ -set title "{/*1.5 Cholesky decomposition}" 0.000000,0.000000 -set xlabel "matrix size" 0.000000,0.000000 -set xrange [6:1000] diff --git a/bench/btl/data/go_mean b/bench/btl/data/go_mean index 7ca94e091..f8edf43db 100755 --- a/bench/btl/data/go_mean +++ b/bench/btl/data/go_mean @@ -5,7 +5,9 @@ mkdir -p $1 EIGENDIR=`cat eigen_root_dir.txt` webpagefilename=$1/index.html -#cp header.html $webpagefilename +meanstatsfilename=$1/mean.html + +echo '' > $meanstatsfilename echo '' > $webpagefilename echo '<p><strong>Configuration</strong>' >> $webpagefilename echo '<ul>'\ @@ -16,17 +18,19 @@ echo '<ul>'\ '</ul>' \ '</p>' >> $webpagefilename -source mk_mean_script.sh axpy $1 11 2500 100000 250000 > $1/axpy.html -source mk_mean_script.sh axpby $1 11 2500 100000 250000 > $1/axpby.html -source mk_mean_script.sh matrix_vector $1 11 50 300 1000 > $1/matrix_vector.html -source mk_mean_script.sh atv $1 11 50 300 1000 > $1/atv.html -source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 > $1/matrix_matrix.html -source mk_mean_script.sh aat $1 11 100 300 1000 > $1/aat.html -source mk_mean_script.sh ata $1 11 100 300 1000 > $1/ata.html -source mk_mean_script.sh trisolve $1 11 100 300 1000 > $1/trisolve.html -source mk_mean_script.sh cholesky $1 11 100 300 1000 > $1/cholesky.html -source mk_mean_script.sh hessenberg $1 11 100 300 1000 > $1/hessenberg.html -source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 > $1/tridiagonalization.html +source mk_mean_script.sh axpy $1 11 2500 100000 250000 $2 +source mk_mean_script.sh axpby $1 11 2500 100000 250000 $2 +source mk_mean_script.sh matrix_vector $1 11 50 300 1000 $2 +source mk_mean_script.sh atv $1 11 50 300 1000 $2 +source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $2 +source mk_mean_script.sh aat $1 11 100 300 1000 $2 +source mk_mean_script.sh ata $1 11 100 300 1000 $2 +source mk_mean_script.sh trisolve $1 11 100 300 1000 $2 +source mk_mean_script.sh cholesky $1 11 100 300 1000 $2 +source mk_mean_script.sh lu_decomp $1 11 100 300 1000 $2 +source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 $2 +source mk_mean_script.sh hessenberg $1 11 100 300 1000 $2 + ## compile the web page ## diff --git a/bench/btl/data/hessenberg.hh b/bench/btl/data/hessenberg.hh deleted file mode 100644 index 24d889f68..000000000 --- a/bench/btl/data/hessenberg.hh +++ /dev/null @@ -1,3 +0,0 @@ -set title "{/*1.5 Hessenberg decomposition}" 0.000000,0.000000 -set xlabel "matrix size" 0.000000,0.000000 -set xrange [6:1000] diff --git a/bench/btl/data/matrix_matrix.hh b/bench/btl/data/matrix_matrix.hh deleted file mode 100644 index 24f3c108d..000000000 --- a/bench/btl/data/matrix_matrix.hh +++ /dev/null @@ -1,3 +0,0 @@ -set title "{/*1.5 matrix matrix product}" -set xlabel "matrix size" 0.000000,0.000000 -set xrange [4:1024] diff --git a/bench/btl/data/matrix_vector.hh b/bench/btl/data/matrix_vector.hh deleted file mode 100644 index 137f893ab..000000000 --- a/bench/btl/data/matrix_vector.hh +++ /dev/null @@ -1,3 +0,0 @@ -set title "{/*1.5 matrix vector product}" 0.000000,0.000000 -set xlabel "matrix size" 0.000000,0.000000 -set xrange [4:1024] diff --git a/bench/btl/data/mk_mean_script.sh b/bench/btl/data/mk_mean_script.sh index 3d9fa1d58..baa0fd9df 100644 --- a/bench/btl/data/mk_mean_script.sh +++ b/bench/btl/data/mk_mean_script.sh @@ -6,12 +6,14 @@ MAXIC=$4 MINOC=$5 MAXOC=$6 +meanstatsfilename=$2/mean.html + WORK_DIR=tmp mkdir $WORK_DIR DATA_FILE=`find $DIR -name "*.dat" | grep _${WHAT}` echo "" -echo $"1..." +echo "$1..." for FILE in $DATA_FILE do ##echo hello world @@ -24,13 +26,15 @@ do done cd $WORK_DIR -../main $1 $3 $4 $5 $6 * -../mk_new_gnuplot.sh $1 $2 +../main $1 $3 $4 $5 $6 * >> ../$meanstatsfilename +../mk_new_gnuplot.sh $1 $2 $7 rm -f *.gnuplot cd .. rm -R $WORK_DIR +echo '<br/>' >> $meanstatsfilename + webpagefilename=$2/index.html # echo '<h3>'${WHAT}'</h3>' >> $webpagefilename echo '<hr/><a href="/btl/'$1'.pdf"><img src="/btl/'$1'.png" alt="'${WHAT}'" /></a><br/>' >> $webpagefilename diff --git a/bench/btl/data/mk_new_gnuplot.sh b/bench/btl/data/mk_new_gnuplot.sh index e6c35597a..96a2a4e1a 100755 --- a/bench/btl/data/mk_new_gnuplot.sh +++ b/bench/btl/data/mk_new_gnuplot.sh @@ -1,14 +1,30 @@ -#! /bin/bash +#!/bin/bash WHAT=$1 DIR=$2 cat ../gnuplot_common_settings.hh > ${WHAT}.gnuplot -cat ../${WHAT}.hh >> ${WHAT}.gnuplot + +echo "set title " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 2` >> $WHAT.gnuplot +echo "set xlabel " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 3` "0.000000,0.000000" >> $WHAT.gnuplot +echo "set xrange [" `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 4` "]" >> $WHAT.gnuplot + +if [ $# > 3 ]; then + if [ $3 == "tiny" ]; then + echo "set xrange [2:16]" >> $WHAT.gnuplot + echo "set nologscale" >> $WHAT.gnuplot + fi +fi + + DATA_FILE=`cat ../order_lib` echo set term postscript color rounded enhanced >> $WHAT.gnuplot echo set output "'"../${DIR}/$WHAT.ps"'" >> $WHAT.gnuplot +# echo set term svg color rounded enhanced >> $WHAT.gnuplot +# echo "set terminal svg enhanced size 1000 1000 fname \"Times\" fsize 36" >> $WHAT.gnuplot +# echo set output "'"../${DIR}/$WHAT.svg"'" >> $WHAT.gnuplot + echo plot \\ >> $WHAT.gnuplot for FILE in $DATA_FILE @@ -34,3 +50,5 @@ rm $WHAT.gnuplot ps2pdf ../${DIR}/$WHAT.ps ../${DIR}/$WHAT.pdf convert -density 120 -rotate 90 -resize 800 +dither -colors 48 -quality 0 ../${DIR}/$WHAT.ps ../${DIR}/$WHAT.png + +# pstoedit -rotate -90 -xscale 0.8 -yscale 0.8 -centered -yshift -50 -xshift -100 -f plot-svg aat.ps aat2.svg diff --git a/bench/btl/data/order_lib b/bench/btl/data/order_lib deleted file mode 100644 index ccbaab12e..000000000 --- a/bench/btl/data/order_lib +++ /dev/null @@ -1,11 +0,0 @@ -eigen2_SSE -eigen2 -INTEL_MKL -ATLAS -STL -C -gmm -mtl4 -ublas -blitz -F77 diff --git a/bench/btl/data/tridiagonalization.hh b/bench/btl/data/tridiagonalization.hh deleted file mode 100644 index 9124398f0..000000000 --- a/bench/btl/data/tridiagonalization.hh +++ /dev/null @@ -1,3 +0,0 @@ -set title "{/*1.5 Tridiagonalization}" 0.000000,0.000000 -set xlabel "matrix size" 0.000000,0.000000 -set xrange [6:1000] diff --git a/bench/btl/data/trisolve.hh b/bench/btl/data/trisolve.hh deleted file mode 100644 index 6c3000f2a..000000000 --- a/bench/btl/data/trisolve.hh +++ /dev/null @@ -1,3 +0,0 @@ -set title "{/*1.5 triangular solver (X = inv(L) * X)}" 0.000000,0.000000 -set xlabel "matrix-vector size" 0.000000,0.000000 -set xrange [6:1000] diff --git a/bench/btl/generic_bench/bench.hh b/bench/btl/generic_bench/bench.hh index bb3ca6667..005c36395 100644 --- a/bench/btl/generic_bench/bench.hh +++ b/bench/btl/generic_bench/bench.hh @@ -53,13 +53,15 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point ) std::vector<int> tab_sizes(nb_point); // matrices and vector size calculations - size_lin_log(nb_point,size_min,size_max,tab_sizes); - // loop on matrix size + std::vector<int> oldSizes; + std::vector<double> oldFlops; + bool hasOldResults = read_xy_file(filename, oldSizes, oldFlops, true); + int oldi = oldSizes.size() - 1; + // loop on matrix size Perf_Analyzer<Action> perf_action; - for (int i=nb_point-1;i>=0;i--) { //INFOS("size=" <<tab_sizes[i]<<" ("<<nb_point-i<<"/"<<nb_point<<")"); @@ -74,14 +76,28 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point ) #endif tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i]); - std::cout << tab_mflops[i] << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl; + std::cout << tab_mflops[i]; + + if (hasOldResults) + { + while (oldi>=0 && oldSizes[oldi]>tab_sizes[i]) + --oldi; + if (oldi>=0 && oldSizes[oldi]==tab_sizes[i]) + { + if (oldFlops[oldi]<tab_mflops[i]) + std::cout << "\t > "; + else + std::cout << "\t < "; + std::cout << oldFlops[oldi]; + } + --oldi; + } + std::cout << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl; } if (!BtlConfig::Instance.overwriteResults) { - std::vector<int> oldSizes; - std::vector<double> oldFlops; - if (read_xy_file(filename, oldSizes, oldFlops, true)) + if (hasOldResults) { // merge the two data std::vector<int> newSizes; diff --git a/bench/btl/generic_bench/static/bench_static.hh b/bench/btl/generic_bench/static/bench_static.hh index 730590c67..23b55ecff 100644 --- a/bench/btl/generic_bench/static/bench_static.hh +++ b/bench/btl/generic_bench/static/bench_static.hh @@ -27,8 +27,8 @@ #include "xy_file.hh" #include "static/static_size_generator.hh" #include "timers/portable_perf_analyzer.hh" -#include "timers/mixed_perf_analyzer.hh" -#include "timers/x86_perf_analyzer.hh" +// #include "timers/mixed_perf_analyzer.hh" +// #include "timers/x86_perf_analyzer.hh" using namespace std; @@ -50,7 +50,7 @@ BTL_DONT_INLINE void bench_static(void) static_size_generator<max_size,Perf_Analyzer,Action,Interface>::go(tab_sizes,tab_mflops); - dump_file_x_y(tab_sizes,tab_mflops,filename); + dump_xy_file(tab_sizes,tab_mflops,filename); } // default Perf Analyzer diff --git a/bench/btl/generic_bench/static/static_size_generator.hh b/bench/btl/generic_bench/static/static_size_generator.hh index 50f063bc0..dd02df3f1 100644 --- a/bench/btl/generic_bench/static/static_size_generator.hh +++ b/bench/btl/generic_bench/static/static_size_generator.hh @@ -27,12 +27,13 @@ using namespace std; template <int SIZE,template<class> class Perf_Analyzer, template<class> class Action, template<class,int> class Interface> struct static_size_generator{ - static void go(vector<double> & tab_sizes, vector<double> & tab_mflops) + static void go(vector<double> & tab_sizes, vector<double> & tab_mflops) { tab_sizes.push_back(SIZE); - + std::cout << tab_sizes.back() << " \t" << std::flush; Perf_Analyzer<Action<Interface<REAL_TYPE,SIZE> > > perf_action; tab_mflops.push_back(perf_action.eval_mflops(SIZE)); + std::cout << tab_mflops.back() << " MFlops" << std::endl; static_size_generator<SIZE-1,Perf_Analyzer,Action,Interface>::go(tab_sizes,tab_mflops); }; }; diff --git a/bench/btl/libs/C_BLAS/C_BLAS_interface.hh b/bench/btl/libs/C_BLAS/C_BLAS_interface.hh index a3ea73740..21d9bde1a 100644 --- a/bench/btl/libs/C_BLAS/C_BLAS_interface.hh +++ b/bench/btl/libs/C_BLAS/C_BLAS_interface.hh @@ -50,6 +50,11 @@ void scopy_(const int *n, const float *x, const int *incx, float *y, const int * void dpotrf_(const char* uplo, const int* n, double *a, const int* ld, int* info); void ssytrd_(char *uplo, const int *n, float *a, const int *lda, float *d, float *e, float *tau, float *work, int *lwork, int *info ); void sgehrd_( const int *n, int *ilo, int *ihi, float *a, const int *lda, float *tau, float *work, int *lwork, int *info ); + + // LU row pivoting + void sgetrf_(const int* m, const int* n, float *a, const int* ld, int* ipivot, int* info); + // LU full pivoting + void sgetc2_(const int* n, float *a, const int *lda, int *ipiv, int *jpiv, int*info ); #ifdef HAS_LAPACK #endif } @@ -193,7 +198,8 @@ public : } static inline void cholesky(const gene_vector & X, gene_vector & C, int N){ - cblas_scopy(N*N, X, 1, C, 1); + int N2 = N*N; + scopy_(&N2, X, &intone, C, &intone); char uplo = 'L'; int info = 0; spotrf_(&uplo, &N, C, &N, &info); @@ -201,6 +207,16 @@ public : #ifdef HAS_LAPACK + static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){ + int N2 = N*N; + scopy_(&N2, X, &intone, C, &intone); + char uplo = 'L'; + int info = 0; + int * ipiv = (int*)alloca(sizeof(int)*N); + int * jpiv = (int*)alloca(sizeof(int)*N); + sgetc2_(&N, C, &N, ipiv, jpiv, &info); + } + static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){ cblas_scopy(N*N, X, 1, C, 1); int info = 0; diff --git a/bench/btl/libs/C_BLAS/main.cpp b/bench/btl/libs/C_BLAS/main.cpp index 81b350325..21da0b4aa 100644 --- a/bench/btl/libs/C_BLAS/main.cpp +++ b/bench/btl/libs/C_BLAS/main.cpp @@ -23,6 +23,7 @@ #include "basic_actions.hh" #include "action_cholesky.hh" +#include "action_lu_decomp.hh" #ifdef HAS_LAPACK #include "action_hessenberg.hh" @@ -45,9 +46,10 @@ int main() bench<Action_trisolve<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); -// bench<Action_cholesky<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); - + bench<Action_cholesky<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + #ifdef HAS_LAPACK + bench<Action_lu_decomp<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_hessenberg<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_tridiagonalization<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); #endif diff --git a/bench/btl/libs/eigen2/CMakeLists.txt b/bench/btl/libs/eigen2/CMakeLists.txt index f452dc901..3c511f9b8 100644 --- a/bench/btl/libs/eigen2/CMakeLists.txt +++ b/bench/btl/libs/eigen2/CMakeLists.txt @@ -1,4 +1,4 @@ - +find_package(MKL) find_package(Eigen2) if (EIGEN2_FOUND) @@ -25,6 +25,11 @@ if (EIGEN2_FOUND) if(BUILD_btl_eigen2_novec_adv) set_target_properties(btl_eigen2_novec_adv PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE") endif(BUILD_btl_eigen2_novec_adv) + + if(BUILD_btl_eigen2_adv) + target_link_libraries(btl_eigen2_adv ${MKL_LIBRARIES}) + endif(BUILD_btl_eigen2_adv) + ENDIF(NOT BTL_NOVEC) btl_add_bench(btl_tiny_eigen2 btl_tiny_eigen2.cpp OFF) diff --git a/bench/btl/libs/eigen2/btl_tiny_eigen2.cpp b/bench/btl/libs/eigen2/btl_tiny_eigen2.cpp index 26c448df0..d256504f5 100644 --- a/bench/btl/libs/eigen2/btl_tiny_eigen2.cpp +++ b/bench/btl/libs/eigen2/btl_tiny_eigen2.cpp @@ -25,6 +25,8 @@ #include "action_ata_product.hh" #include "action_aat_product.hh" #include "action_atv_product.hh" +#include "action_cholesky.hh" +#include "action_trisolve.hh" BTL_MAIN; @@ -35,6 +37,8 @@ int main() bench_static<Action_matrix_matrix_product,eigen2_interface>(); bench_static<Action_matrix_vector_product,eigen2_interface>(); bench_static<Action_atv_product,eigen2_interface>(); + bench_static<Action_cholesky,eigen2_interface>(); + bench_static<Action_trisolve,eigen2_interface>(); return 0; } diff --git a/bench/btl/libs/eigen2/eigen2_interface.hh b/bench/btl/libs/eigen2/eigen2_interface.hh index 5a85be54d..64314ab3b 100644 --- a/bench/btl/libs/eigen2/eigen2_interface.hh +++ b/bench/btl/libs/eigen2/eigen2_interface.hh @@ -17,9 +17,10 @@ // #ifndef EIGEN2_INTERFACE_HH #define EIGEN2_INTERFACE_HH - +#include <cblas.h> #include <Eigen/Core> #include <Eigen/Cholesky> +#include <Eigen/LU> #include <Eigen/QR> #include <vector> #include "btl.hh" @@ -138,16 +139,25 @@ public : static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){ C = X.cholesky().matrixL(); +// C = X; +// Cholesky<gene_matrix>::computeInPlace(C); +// Cholesky<gene_matrix>::computeInPlaceBlock(C); } - static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){ - C = HessenbergDecomposition<gene_matrix>(X).packedMatrix(); + static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){ + C = X.lu().matrixLU(); } static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){ C = Tridiagonalization<gene_matrix>(X).packedMatrix(); } + static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){ + C = HessenbergDecomposition<gene_matrix>(X).packedMatrix(); + } + + + }; #endif diff --git a/bench/btl/libs/eigen2/main_adv.cpp b/bench/btl/libs/eigen2/main_adv.cpp index 243edb98f..83e48f58e 100644 --- a/bench/btl/libs/eigen2/main_adv.cpp +++ b/bench/btl/libs/eigen2/main_adv.cpp @@ -21,6 +21,7 @@ #include "action_trisolve.hh" #include "action_cholesky.hh" #include "action_hessenberg.hh" +#include "action_lu_decomp.hh" BTL_MAIN; @@ -28,6 +29,7 @@ int main() { bench<Action_trisolve<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_cholesky<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + bench<Action_lu_decomp<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_hessenberg<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_tridiagonalization<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); diff --git a/bench/btl/libs/gmm/gmm_interface.hh b/bench/btl/libs/gmm/gmm_interface.hh index 93f827f9b..cfcf537ac 100644 --- a/bench/btl/libs/gmm/gmm_interface.hh +++ b/bench/btl/libs/gmm/gmm_interface.hh @@ -123,14 +123,20 @@ public : gmm::lower_tri_solve(L, X, false); } - static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){ - gmm::copy(X,C); - gmm::Hessenberg_reduction(C,X,false); + static inline void lu_decomp(const gene_matrix & X, gene_matrix & R, int N){ + gmm::copy(X,R); + std::vector<int> ipvt(N); + gmm::lu_factor(R, ipvt); } - static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){ - gmm::copy(X,C); - gmm::Householder_tridiagonalization(C,X,false); + static inline void hessenberg(const gene_matrix & X, gene_matrix & R, int N){ + gmm::copy(X,R); + gmm::Hessenberg_reduction(R,X,false); + } + + static inline void tridiagonalization(const gene_matrix & X, gene_matrix & R, int N){ + gmm::copy(X,R); + gmm::Householder_tridiagonalization(R,X,false); } }; diff --git a/bench/btl/libs/gmm/main.cpp b/bench/btl/libs/gmm/main.cpp index b7c36fc9a..d5aebcdbf 100644 --- a/bench/btl/libs/gmm/main.cpp +++ b/bench/btl/libs/gmm/main.cpp @@ -20,6 +20,7 @@ #include "bench.hh" #include "basic_actions.hh" #include "action_hessenberg.hh" +#include "action_lu_decomp.hh" BTL_MAIN; @@ -39,6 +40,8 @@ int main() bench<Action_trisolve<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); //bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT); + bench<Action_lu_decomp<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + bench<Action_hessenberg<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_tridiagonalization<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); diff --git a/bench/btl/libs/hand_vec/hand_vec_interface.hh b/bench/btl/libs/hand_vec/hand_vec_interface.hh index fb2e61c75..66babcc9e 100755 --- a/bench/btl/libs/hand_vec/hand_vec_interface.hh +++ b/bench/btl/libs/hand_vec/hand_vec_interface.hh @@ -714,17 +714,36 @@ public : { Packet pcoef = ei_pset1(coef); #ifdef PEELING - int ANP = (AN/(8*PacketSize))*8*PacketSize; - for (int j = 0;j<ANP;j+=PacketSize*8) + const int peelSize = 3; + int ANP = (AN/(peelSize*PacketSize))*peelSize*PacketSize; + float* X1 = X + PacketSize; + float* Y1 = Y + PacketSize; + float* X2 = X + 2*PacketSize; + float* Y2 = Y + 2*PacketSize; + Packet x0,x1,x2,y0,y1,y2; + for (int j = 0;j<ANP;j+=PacketSize*peelSize) { - ei_pstore(&Y[j ], ei_padd(ei_pload(&Y[j ]), ei_pmul(pcoef,ei_pload(&X[j ])))); - ei_pstore(&Y[j+ PacketSize], ei_padd(ei_pload(&Y[j+ PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+ PacketSize])))); - ei_pstore(&Y[j+2*PacketSize], ei_padd(ei_pload(&Y[j+2*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+2*PacketSize])))); - ei_pstore(&Y[j+3*PacketSize], ei_padd(ei_pload(&Y[j+3*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+3*PacketSize])))); - ei_pstore(&Y[j+4*PacketSize], ei_padd(ei_pload(&Y[j+4*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+4*PacketSize])))); - ei_pstore(&Y[j+5*PacketSize], ei_padd(ei_pload(&Y[j+5*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+5*PacketSize])))); - ei_pstore(&Y[j+6*PacketSize], ei_padd(ei_pload(&Y[j+6*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+6*PacketSize])))); - ei_pstore(&Y[j+7*PacketSize], ei_padd(ei_pload(&Y[j+7*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+7*PacketSize])))); + x0 = ei_pload(X+j); + x1 = ei_pload(X1+j); + x2 = ei_pload(X2+j); + + y0 = ei_pload(Y+j); + y1 = ei_pload(Y1+j); + y2 = ei_pload(Y2+j); + + y0 = ei_pmadd(pcoef, x0, y0); + y1 = ei_pmadd(pcoef, x1, y1); + y2 = ei_pmadd(pcoef, x2, y2); + + ei_pstore(Y+j, y0); + ei_pstore(Y1+j, y1); + ei_pstore(Y2+j, y2); +// ei_pstore(&Y[j+2*PacketSize], ei_padd(ei_pload(&Y[j+2*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+2*PacketSize])))); +// ei_pstore(&Y[j+3*PacketSize], ei_padd(ei_pload(&Y[j+3*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+3*PacketSize])))); +// ei_pstore(&Y[j+4*PacketSize], ei_padd(ei_pload(&Y[j+4*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+4*PacketSize])))); +// ei_pstore(&Y[j+5*PacketSize], ei_padd(ei_pload(&Y[j+5*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+5*PacketSize])))); +// ei_pstore(&Y[j+6*PacketSize], ei_padd(ei_pload(&Y[j+6*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+6*PacketSize])))); +// ei_pstore(&Y[j+7*PacketSize], ei_padd(ei_pload(&Y[j+7*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+7*PacketSize])))); } for (int j = ANP;j<AN;j+=PacketSize) ei_pstore(&Y[j], ei_padd(ei_pload(&Y[j]), ei_pmul(pcoef,ei_pload(&X[j])))); diff --git a/bench/btl/libs/hand_vec/main.cpp b/bench/btl/libs/hand_vec/main.cpp index 19d45b2f1..eea8cb88b 100644 --- a/bench/btl/libs/hand_vec/main.cpp +++ b/bench/btl/libs/hand_vec/main.cpp @@ -27,7 +27,7 @@ #include "action_ata_product.hh" #include "action_aat_product.hh" //#include "action_lu_solve.hh" -#include "timers/mixed_perf_analyzer.hh" +// #include "timers/mixed_perf_analyzer.hh" BTL_MAIN; diff --git a/bench/btl/libs/mtl4/main.cpp b/bench/btl/libs/mtl4/main.cpp index 94c81beac..d671e7ee9 100644 --- a/bench/btl/libs/mtl4/main.cpp +++ b/bench/btl/libs/mtl4/main.cpp @@ -20,6 +20,7 @@ #include "bench.hh" #include "basic_actions.hh" #include "action_cholesky.hh" +// #include "action_lu_decomp.hh" BTL_MAIN; @@ -37,6 +38,7 @@ int main() bench<Action_trisolve<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_cholesky<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); +// bench<Action_lu_decomp<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); return 0; } diff --git a/bench/btl/libs/mtl4/mtl4_interface.hh b/bench/btl/libs/mtl4/mtl4_interface.hh index a299fa83e..c08b35645 100644 --- a/bench/btl/libs/mtl4/mtl4_interface.hh +++ b/bench/btl/libs/mtl4/mtl4_interface.hh @@ -120,6 +120,12 @@ public : recursive_cholesky(C); } +// static inline void lu_decomp(const gene_matrix & X, gene_matrix & R, int N){ +// R = X; +// std::vector<int> ipvt(N); +// lu_factor(R, ipvt); +// } + static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){ X = lower_trisolve(L, B); } diff --git a/bench/btl/libs/tvmet/tvmet_interface.hh b/bench/btl/libs/tvmet/tvmet_interface.hh index 28aa35300..b441ada21 100644 --- a/bench/btl/libs/tvmet/tvmet_interface.hh +++ b/bench/btl/libs/tvmet/tvmet_interface.hh @@ -41,7 +41,7 @@ public : typedef Vector<real,SIZE> gene_vector; typedef Matrix<real,SIZE,SIZE> gene_matrix; - static inline std::string name() { return "tvmet"; } + static inline std::string name() { return "tiny_tvmet"; } static void free_matrix(gene_matrix & A, int N){} |