diff options
author | Gael Guennebaud <g.gael@free.fr> | 2008-07-27 11:39:47 +0000 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2008-07-27 11:39:47 +0000 |
commit | 93115619c23bb41fd24b0090cb6adec501edaced (patch) | |
tree | 60ae0887b0705b8a994f8ef9baa5324c87883861 /bench/btl | |
parent | e9e5261664cc77049f8b77a2c36c535fbd44889c (diff) |
* updated benchmark files according to recent renamings
* various improvements in BTL including trisolver and cholesky bench
Diffstat (limited to 'bench/btl')
40 files changed, 1182 insertions, 250 deletions
diff --git a/bench/btl/CMakeLists.txt b/bench/btl/CMakeLists.txt index 600fc38f1..a7f90a2e9 100644 --- a/bench/btl/CMakeLists.txt +++ b/bench/btl/CMakeLists.txt @@ -26,6 +26,11 @@ include_directories( ${PROJECT_SOURCE_DIR}/generic_bench/utils ${PROJECT_SOURCE_DIR}/libs/STL) +# find_package(MKL) +# if (MKL_FOUND) +# add_definitions(-DHAVE_MKL) +# set(DEFAULT_LIBRARIES ${MKL_LIBRARIES}) +# endif (MKL_FOUND) MACRO(BTL_ADD_BENCH targetname) @@ -49,6 +54,7 @@ MACRO(BTL_ADD_BENCH targetname) IF(BUILD_${targetname}) ADD_EXECUTABLE(${targetname} ${_sources}) ADD_TEST(${targetname} "${targetname}") + target_link_libraries(${targetname} ${DEFAULT_LIBRARIES}) ENDIF(BUILD_${targetname}) ENDMACRO(BTL_ADD_BENCH) diff --git a/bench/btl/README b/bench/btl/README index 6d5712bb6..787002f9a 100644 --- a/bench/btl/README +++ b/bench/btl/README @@ -29,7 +29,18 @@ BTL uses cmake / ctest: $ ctest -V -You can also run a single bench, e.g.: ctest -V -R eigen +You can run the benchmarks only on libraries matching a given regular expression: + ctest -V -R <regexp> +For instance: + ctest -V -R eigen2 + +You can also select a given set of actions defining the environment variable BTL_CONFIG this way: + BTL_CONFIG="-a action1{:action2}*" ctest -V +An exemple: + BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata" ctest -V -R eigen2 + +Finally, if bench results already exist (the bench*.dat files) then they merges by keeping the best for each matrix size. If you want to overwrite the previous ones you can simply add the "--overwrite" option: + BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2 4 : Analyze the result. different data files (.dat) are produced in each libs directories. If gnuplot is available, choose a directory name in the data directory to store the results and type diff --git a/bench/btl/actions/action_axpby.hh b/bench/btl/actions/action_axpby.hh new file mode 100644 index 000000000..7a4f158c1 --- /dev/null +++ b/bench/btl/actions/action_axpby.hh @@ -0,0 +1,124 @@ +//===================================================== +// File : action_axpby.hh +// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_AXPBY +#define ACTION_AXPBY +#include "utilities.h" +#include "STL_interface.hh" +#include <string> +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template<class Interface> +class Action_axpby { + +public : + + // Ctor + Action_axpby( int size ):_size(size),_alpha(0.5),_beta(0.95) + { + MESSAGE("Action_axpby Ctor"); + + // STL vector initialization + init_vector<pseudo_random>(X_stl,_size); + init_vector<pseudo_random>(Y_stl,_size); + init_vector<null_function>(resu_stl,_size); + + // generic matrix and vector initialization + Interface::vector_from_stl(X_ref,X_stl); + Interface::vector_from_stl(Y_ref,Y_stl); + + Interface::vector_from_stl(X,X_stl); + Interface::vector_from_stl(Y,Y_stl); + } + + // invalidate copy ctor + Action_axpby( const Action_axpby & ) + { + INFOS("illegal call to Action_axpby Copy Ctor"); + exit(1); + } + + // Dtor + ~Action_axpby( void ){ + MESSAGE("Action_axpby Dtor"); + + // deallocation + Interface::free_vector(X_ref); + Interface::free_vector(Y_ref); + + Interface::free_vector(X); + Interface::free_vector(Y); + } + + // action name + static inline std::string name( void ) + { + return "axpby_"+Interface::name(); + } + + double nb_op_base( void ){ + return 3.0*_size; + } + + inline void initialize( void ){ + Interface::copy_vector(X_ref,X,_size); + Interface::copy_vector(Y_ref,Y,_size); + } + + inline void calculate( void ) { + Interface::axpby(_alpha,X,_beta,Y,_size); + } + + void check_result( void ){ + // calculation check + Interface::vector_to_stl(Y,resu_stl); + + STL_interface<typename Interface::real_type>::axpby(_alpha,X_stl,_beta,Y_stl,_size); + + typename Interface::real_type error= + STL_interface<typename Interface::real_type>::norm_diff(Y_stl,resu_stl); + + if (error>1.e-6){ + INFOS("WRONG CALCULATION...residual=" << error); + exit(2); + } + } + +private : + + typename Interface::stl_vector X_stl; + typename Interface::stl_vector Y_stl; + typename Interface::stl_vector resu_stl; + + typename Interface::gene_vector X_ref; + typename Interface::gene_vector Y_ref; + + typename Interface::gene_vector X; + typename Interface::gene_vector Y; + + typename Interface::real_type _alpha; + typename Interface::real_type _beta; + + int _size; +}; + +#endif diff --git a/bench/btl/actions/action_cholesky.hh b/bench/btl/actions/action_cholesky.hh new file mode 100644 index 000000000..67495b9cb --- /dev/null +++ b/bench/btl/actions/action_cholesky.hh @@ -0,0 +1,132 @@ +//===================================================== +// File : action_cholesky.hh +// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_CHOLESKY +#define ACTION_CHOLESKY +#include "utilities.h" +#include "STL_interface.hh" +#include <string> +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template<class Interface> +class Action_cholesky { + +public : + + // Ctor + + Action_cholesky( int size ):_size(size) + { + MESSAGE("Action_cholesky Ctor"); + + // STL vector initialization + typename Interface::stl_matrix tmp; + init_matrix<pseudo_random>(tmp,_size); + init_matrix<null_function>(X_stl,_size); + STL_interface<typename Interface::real_type>::ata_product(tmp,X_stl,_size); + + init_matrix<null_function>(C_stl,_size); + init_matrix<null_function>(resu_stl,_size); + + // generic matrix and vector initialization + Interface::matrix_from_stl(X_ref,X_stl); + Interface::matrix_from_stl(X,X_stl); + Interface::matrix_from_stl(C,C_stl); + + _cost = 0; + for (int j=0; j<_size; ++j) + { + int r = std::max(_size - j -1,0); + _cost += 2*(r*j+r+j); + } + } + + // invalidate copy ctor + + Action_cholesky( const Action_cholesky & ) + { + INFOS("illegal call to Action_cholesky Copy Ctor"); + exit(1); + } + + // Dtor + + ~Action_cholesky( void ){ + + MESSAGE("Action_cholesky Dtor"); + + // deallocation + Interface::free_matrix(X_ref,_size); + Interface::free_matrix(X,_size); + Interface::free_matrix(C,_size); + } + + // action name + + static inline std::string name( void ) + { + return "cholesky_"+Interface::name(); + } + + double nb_op_base( void ){ + return _cost; + } + + inline void initialize( void ){ + Interface::copy_matrix(X_ref,X,_size); + } + + inline void calculate( void ) { + Interface::cholesky(X,C,_size); + } + + void check_result( void ){ + // calculation check + Interface::matrix_to_stl(C,resu_stl); + +// STL_interface<typename Interface::real_type>::cholesky(X_stl,C_stl,_size); +// +// typename Interface::real_type error= +// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl); +// +// if (error>1.e-6){ +// INFOS("WRONG CALCULATION...residual=" << error); +// exit(0); +// } + + } + +private : + + typename Interface::stl_matrix X_stl; + typename Interface::stl_matrix C_stl; + typename Interface::stl_matrix resu_stl; + + typename Interface::gene_matrix X_ref; + typename Interface::gene_matrix X; + typename Interface::gene_matrix C; + + int _size; + int _cost; +}; + +#endif diff --git a/bench/btl/actions/action_matrix_vector_product.hh b/bench/btl/actions/action_matrix_vector_product.hh index 3a172f292..202bc1a85 100644 --- a/bench/btl/actions/action_matrix_vector_product.hh +++ b/bench/btl/actions/action_matrix_vector_product.hh @@ -49,11 +49,10 @@ public : // generic matrix and vector initialization Interface::matrix_from_stl(A_ref,A_stl); - Interface::vector_from_stl(B_ref,B_stl); - Interface::vector_from_stl(X_ref,X_stl); - Interface::matrix_from_stl(A,A_stl); + Interface::vector_from_stl(B_ref,B_stl); Interface::vector_from_stl(B,B_stl); + Interface::vector_from_stl(X_ref,X_stl); Interface::vector_from_stl(X,X_stl); } diff --git a/bench/btl/actions/action_trisolve.hh b/bench/btl/actions/action_trisolve.hh new file mode 100644 index 000000000..f97f7dbc9 --- /dev/null +++ b/bench/btl/actions/action_trisolve.hh @@ -0,0 +1,136 @@ +//===================================================== +// File : action_trisolve.hh +// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_TRISOLVE +#define ACTION_TRISOLVE +#include "utilities.h" +#include "STL_interface.hh" +#include <string> +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template<class Interface> +class Action_trisolve { + +public : + + // Ctor + + Action_trisolve( int size ):_size(size) + { + MESSAGE("Action_trisolve Ctor"); + + // STL vector initialization + init_matrix<pseudo_random>(L_stl,_size); + init_vector<pseudo_random>(B_stl,_size); + init_vector<null_function>(X_stl,_size); + for (int j=0; j<_size; ++j) + { + for (int i=0; i<j; ++i) + L_stl[j][i] = 0; + L_stl[j][j] += 3; + } + + init_vector<null_function>(resu_stl,_size); + + // generic matrix and vector initialization + Interface::matrix_from_stl(L,L_stl); + Interface::vector_from_stl(X,X_stl); + Interface::vector_from_stl(B,B_stl); + + _cost = 0; + for (int j=0; j<_size; ++j) + { + _cost += 2*j + 1; + } + } + + // invalidate copy ctor + + Action_trisolve( const Action_trisolve & ) + { + INFOS("illegal call to Action_trisolve Copy Ctor"); + exit(1); + } + + // Dtor + + ~Action_trisolve( void ){ + + MESSAGE("Action_trisolve Dtor"); + + // deallocation + Interface::free_matrix(L,_size); + Interface::free_vector(B); + Interface::free_vector(X); + } + + // action name + + static inline std::string name( void ) + { + return "trisolve_"+Interface::name(); + } + + double nb_op_base( void ){ + return _cost; + } + + inline void initialize( void ){ + //Interface::copy_vector(X_ref,X,_size); + } + + inline void calculate( void ) { + Interface::trisolve_lower(L,B,X,_size); + } + + void check_result( void ){ + // calculation check + Interface::vector_to_stl(X,resu_stl); + + STL_interface<typename Interface::real_type>::trisolve_lower(L_stl,B_stl,X_stl,_size); + + typename Interface::real_type error= + STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl); + + if (error>1.e-4){ + INFOS("WRONG CALCULATION...residual=" << error); + exit(2); + } //else INFOS("CALCULATION OK...residual=" << error); + + } + +private : + + typename Interface::stl_matrix L_stl; + typename Interface::stl_vector X_stl; + typename Interface::stl_vector B_stl; + typename Interface::stl_vector resu_stl; + + typename Interface::gene_matrix L; + typename Interface::gene_vector X; + typename Interface::gene_vector B; + + int _size; + int _cost; +}; + +#endif diff --git a/bench/btl/actions/basic_actions.hh b/bench/btl/actions/basic_actions.hh new file mode 100644 index 000000000..fd2e74950 --- /dev/null +++ b/bench/btl/actions/basic_actions.hh @@ -0,0 +1,15 @@ + +#include "action_axpy.hh" +#include "action_axpby.hh" + +#include "action_matrix_vector_product.hh" +#include "action_atv_product.hh" + +#include "action_matrix_matrix_product.hh" +#include "action_ata_product.hh" +#include "action_aat_product.hh" + +#include "action_trisolve.hh" + +// #include "action_lu_solve.hh" + diff --git a/bench/btl/cmake/FindATLAS.cmake b/bench/btl/cmake/FindATLAS.cmake new file mode 100644 index 000000000..1a7d7e85a --- /dev/null +++ b/bench/btl/cmake/FindATLAS.cmake @@ -0,0 +1,35 @@ +# include(FindLibraryWithDebug) + +if (ATLAS_INCLUDES AND ATLAS_LIBRARIES) + set(ATLAS_FIND_QUIETLY TRUE) +endif (ATLAS_INCLUDES AND ATLAS_LIBRARIES) + +find_path(ATLAS_INCLUDES + NAMES + cblas.h + PATHS + $ENV{ATLASDIR}/include + ${INCLUDE_INSTALL_DIR} +) + +find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_library(ATLAS_LIB atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) + +find_file(ATLAS_CBLAS libcblas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_library(ATLAS_CBLAS cblas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) + +find_file(ATLAS_LAPACK liblapack_atlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_library(ATLAS_LAPACK lapack_atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) + +find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) + +if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS) +set(ATLAS_LIBRARIES ${ATLAS_LIB} ${ATLAS_CBLAS} ${ATLAS_LAPACK} ${ATLAS_F77BLAS}) +endif(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ATLAS DEFAULT_MSG + ATLAS_INCLUDES ATLAS_LIBRARIES) + +mark_as_advanced(ATLAS_INCLUDES ATLAS_LIBRARIES) diff --git a/bench/btl/data/CMakeLists.txt b/bench/btl/data/CMakeLists.txt index decfc45e9..9b7e627d3 100644 --- a/bench/btl/data/CMakeLists.txt +++ b/bench/btl/data/CMakeLists.txt @@ -1,7 +1,7 @@ ADD_CUSTOM_TARGET(copy_scripts) -SET(script_files go_mean aat.hh ata.hh axpy.hh order_lib mk_mean_script.sh mk_new_gnuplot.sh mk_gnuplot_script.sh matrix_matrix.hh matrix_vector.hh atv.hh perlib_plot_settings.txt gnuplot_common_settings.hh ) +SET(script_files go_mean aat.hh ata.hh axpy.hh axpby.hh cholesky.hh trisolve.hh order_lib mk_mean_script.sh mk_new_gnuplot.sh mk_gnuplot_script.sh matrix_matrix.hh matrix_vector.hh atv.hh perlib_plot_settings.txt gnuplot_common_settings.hh ) FOREACH(script_file ${script_files}) ADD_CUSTOM_COMMAND( diff --git a/bench/btl/data/axpby.hh b/bench/btl/data/axpby.hh new file mode 100644 index 000000000..744e3063f --- /dev/null +++ b/bench/btl/data/axpby.hh @@ -0,0 +1,3 @@ +set title "{/*1.5 Y = alpha * X + beta * Y}" 0.000000,0.000000 +set xlabel "vector size" 0.000000,0.000000 +set xrange [1:1000000] diff --git a/bench/btl/data/cholesky.hh b/bench/btl/data/cholesky.hh new file mode 100644 index 000000000..78336b4ee --- /dev/null +++ b/bench/btl/data/cholesky.hh @@ -0,0 +1,3 @@ +set title "{/*1.5 Cholesky decomposition}" 0.000000,0.000000 +set xlabel "matrix size" 0.000000,0.000000 +set xrange [6:1000] diff --git a/bench/btl/data/go_mean b/bench/btl/data/go_mean index 422b357a6..05835a8c4 100755 --- a/bench/btl/data/go_mean +++ b/bench/btl/data/go_mean @@ -17,11 +17,13 @@ echo '<ul>'\ '</p>' >> $webpagefilename source mk_mean_script.sh axpy $1 11 2500 100000 250000 > $1/axpy.html -source mk_mean_script.sh matrix_vector $1 11 50 300 500 > $1/matrix_vector.html -source mk_mean_script.sh atv $1 11 50 300 500 > $1/atv.html -# source mk_mean_script.sh matrix_matrix $1 11 100 300 500 > $1/matrix_matrix.html -# source mk_mean_script.sh aat $1 11 100 300 1000 > $1/aat.html -# source mk_mean_script.sh ata $1 11 100 300 1000 > $1/ata.html +source mk_mean_script.sh matrix_vector $1 11 50 300 1000 > $1/matrix_vector.html +source mk_mean_script.sh atv $1 11 50 300 1000 > $1/atv.html +source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 > $1/matrix_matrix.html +source mk_mean_script.sh aat $1 11 100 300 1000 > $1/aat.html +source mk_mean_script.sh ata $1 11 100 300 1000 > $1/ata.html +source mk_mean_script.sh trisolve $1 11 100 300 1000 > $1/trisolve.html +source mk_mean_script.sh cholesky $1 11 100 300 1000 > $1/cholesky.html ## compile the web page ## diff --git a/bench/btl/data/mean.cxx b/bench/btl/data/mean.cxx index 96d37dc4a..3e1a82d22 100644 --- a/bench/btl/data/mean.cxx +++ b/bench/btl/data/mean.cxx @@ -23,11 +23,11 @@ #include <iostream> #include <fstream> #include "bench_parameter.hh" +#include "utils/xy_file.hh" #include <set> using namespace std; -void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops); double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max); class Lib_Mean{ @@ -152,29 +152,6 @@ int main( int argc , char *argv[] ) } -void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops){ - - ifstream input_file (filename.c_str(),ios::in) ; - - if (!input_file){ - INFOS("!!! Error opening "<<filename); - exit(0); - } - - int nb_point=0; - int size=0; - double mflops=0; - - while (input_file >> size >> mflops ){ - nb_point++; - tab_sizes.push_back(size); - tab_mflops.push_back(mflops); - } - SCRUTE(nb_point); - - input_file.close(); -} - double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max){ int size=tab_sizes.size(); diff --git a/bench/btl/data/trisolve.hh b/bench/btl/data/trisolve.hh new file mode 100644 index 000000000..6c3000f2a --- /dev/null +++ b/bench/btl/data/trisolve.hh @@ -0,0 +1,3 @@ +set title "{/*1.5 triangular solver (X = inv(L) * X)}" 0.000000,0.000000 +set xlabel "matrix-vector size" 0.000000,0.000000 +set xrange [6:1000] diff --git a/bench/btl/generic_bench/bench.hh b/bench/btl/generic_bench/bench.hh index cace2695d..bb3ca6667 100644 --- a/bench/btl/generic_bench/bench.hh +++ b/bench/btl/generic_bench/bench.hh @@ -25,14 +25,16 @@ #include <iostream> #include "utilities.h" #include "size_lin_log.hh" -#include "dump_file_x_y.hh" +#include "xy_file.hh" #include <vector> #include <string> #include "timers/portable_perf_analyzer.hh" -//#include "timers/mixed_perf_analyzer.hh" -//#include "timers/x86_perf_analyzer.hh" -//#include "timers/STL_perf_analyzer.hh" - +// #include "timers/mixed_perf_analyzer.hh" +// #include "timers/x86_perf_analyzer.hh" +// #include "timers/STL_perf_analyzer.hh" +#ifdef HAVE_MKL +extern "C" void cblas_saxpy(const int, const float, const float*, const int, float *, const int); +#endif using namespace std; template <template<class> class Perf_Analyzer, class Action> @@ -41,8 +43,6 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point ) if (BtlConfig::skipAction(Action::name())) return; - BTL_DISABLE_SSE_EXCEPTIONS(); - string filename="bench_"+Action::name()+".dat"; INFOS("starting " <<filename); @@ -64,14 +64,71 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point ) { //INFOS("size=" <<tab_sizes[i]<<" ("<<nb_point-i<<"/"<<nb_point<<")"); std::cout << " " << "size = " << tab_sizes[i] << " " << std::flush; + + BTL_DISABLE_SSE_EXCEPTIONS(); + #ifdef HAVE_MKL + { + float dummy; + cblas_saxpy(1,0,&dummy,1,&dummy,1); + } + #endif + tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i]); std::cout << tab_mflops[i] << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl; } -// std::cout << "\n"; - // dump the result in a file : + if (!BtlConfig::Instance.overwriteResults) + { + std::vector<int> oldSizes; + std::vector<double> oldFlops; + if (read_xy_file(filename, oldSizes, oldFlops, true)) + { + // merge the two data + std::vector<int> newSizes; + std::vector<double> newFlops; + int i=0; + int j=0; + while (i<tab_sizes.size() && j<oldSizes.size()) + { + if (tab_sizes[i] == oldSizes[j]) + { + newSizes.push_back(tab_sizes[i]); + newFlops.push_back(std::max(tab_mflops[i], oldFlops[j])); + ++i; + ++j; + } + else if (tab_sizes[i] < oldSizes[j]) + { + newSizes.push_back(tab_sizes[i]); + newFlops.push_back(tab_mflops[i]); + ++i; + } + else + { + newSizes.push_back(oldSizes[j]); + newFlops.push_back(oldFlops[j]); + ++j; + } + } + while (i<tab_sizes.size()) + { + newSizes.push_back(tab_sizes[i]); + newFlops.push_back(tab_mflops[i]); + ++i; + } + while (j<oldSizes.size()) + { + newSizes.push_back(oldSizes[j]); + newFlops.push_back(oldFlops[j]); + ++j; + } + tab_mflops = newFlops; + tab_sizes = newSizes; + } + } - dump_file_x_y(tab_sizes,tab_mflops,filename); + // dump the result in a file : + dump_xy_file(tab_sizes,tab_mflops,filename); } @@ -87,8 +144,8 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point ){ // Only for small problem size. Otherwize it will be too long - //bench<X86_Perf_Analyzer,Action>(size_min,size_max,nb_point); - //bench<STL_Perf_Analyzer,Action>(size_min,size_max,nb_point); +// bench<X86_Perf_Analyzer,Action>(size_min,size_max,nb_point); +// bench<STL_Perf_Analyzer,Action>(size_min,size_max,nb_point); } diff --git a/bench/btl/generic_bench/bench_parameter.hh b/bench/btl/generic_bench/bench_parameter.hh index 5714cb3c2..62ba20e4c 100644 --- a/bench/btl/generic_bench/bench_parameter.hh +++ b/bench/btl/generic_bench/bench_parameter.hh @@ -23,7 +23,7 @@ // minimal time for each measurement #define REAL_TYPE float // minimal time for each measurement -#define MIN_TIME 0.5 +#define MIN_TIME 1.0 // nb of point on bench curves #define NB_POINT 100 // min vector size for axpy bench diff --git a/bench/btl/generic_bench/btl.hh b/bench/btl/generic_bench/btl.hh index 784702432..6d6e048b3 100644 --- a/bench/btl/generic_bench/btl.hh +++ b/bench/btl/generic_bench/btl.hh @@ -48,7 +48,7 @@ : : [aux] "m" (aux)); \ } #else -#define DISABLE_SSE_EXCEPTIONS() +#define BTL_DISABLE_SSE_EXCEPTIONS() #endif /** Enhanced std::string @@ -163,7 +163,7 @@ class BtlConfig { public: BtlConfig() - : m_runSingleAction(false) + : overwriteResults(false) { char * _config; _config = getenv ("BTL_CONFIG"); @@ -179,33 +179,38 @@ public: std::cerr << "error processing option: " << config[i] << "\n"; exit(2); } - Instance.m_runSingleAction = true; - Instance.m_singleActionName = config[i+1]; + Instance.m_selectedActionNames = config[i+1].split(":"); i += 1; } + else if (config[i].beginsWith("--overwrite")) + { + Instance.overwriteResults = true; + } } } BTL_DISABLE_SSE_EXCEPTIONS(); } - BTL_DONT_INLINE static bool skipAction(const std::string& name) + BTL_DONT_INLINE static bool skipAction(const std::string& _name) { - if (Instance.m_runSingleAction) - { - return !BtlString(name).contains(Instance.m_singleActionName); - } + if (Instance.m_selectedActionNames.empty()) + return false; - return false; + BtlString name(_name); + for (int i=0; i<Instance.m_selectedActionNames.size(); ++i) + if (name.contains(Instance.m_selectedActionNames[i])) + return false; + + return true; } + static BtlConfig Instance; + bool overwriteResults; protected: - bool m_runSingleAction; - BtlString m_singleActionName; - - static BtlConfig Instance; + std::vector<BtlString> m_selectedActionNames; }; #define BTL_MAIN \ diff --git a/bench/btl/generic_bench/init/init_vector.hh b/bench/btl/generic_bench/init/init_vector.hh index faf14f953..efaf0c92e 100644 --- a/bench/btl/generic_bench/init/init_vector.hh +++ b/bench/btl/generic_bench/init/init_vector.hh @@ -1,14 +1,14 @@ //===================================================== // File : init_vector.hh -// Author : L. Plagne <laurent.plagne@edf.fr)> +// Author : L. Plagne <laurent.plagne@edf.fr)> // Copyright (C) EDF R&D, lun sep 30 14:23:18 CEST 2002 //===================================================== -// +// // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 2 // of the License, or (at your option) any later version. -// +// // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -16,7 +16,7 @@ // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// +// #ifndef INIT_VECTOR_HH #define INIT_VECTOR_HH diff --git a/bench/btl/generic_bench/static/bench_static.hh b/bench/btl/generic_bench/static/bench_static.hh index cdb645fc2..730590c67 100644 --- a/bench/btl/generic_bench/static/bench_static.hh +++ b/bench/btl/generic_bench/static/bench_static.hh @@ -24,7 +24,7 @@ #include "bench_parameter.hh" #include <iostream> #include "utilities.h" -#include "dump_file_x_y.hh" +#include "xy_file.hh" #include "static/static_size_generator.hh" #include "timers/portable_perf_analyzer.hh" #include "timers/mixed_perf_analyzer.hh" diff --git a/bench/btl/generic_bench/timers/portable_perf_analyzer.hh b/bench/btl/generic_bench/timers/portable_perf_analyzer.hh index d716154fd..d0fe95ce0 100644 --- a/bench/btl/generic_bench/timers/portable_perf_analyzer.hh +++ b/bench/btl/generic_bench/timers/portable_perf_analyzer.hh @@ -38,16 +38,16 @@ public: MESSAGE("Portable_Perf_Analyzer Dtor"); }; - - BTL_DONT_INLINE double eval_mflops(int size) { Action action(size); - - double time_action = time_calculate(action); + double time_action = 0; + action.initialize(); + time_action = time_calculate(action); while (time_action < MIN_TIME) { + //Action action(size); _nb_calc *= 2; action.initialize(); time_action = time_calculate(action); @@ -56,8 +56,10 @@ public: // optimize for (int i=1; i<NB_TRIES; ++i) { - action.initialize(); - time_action = std::min(time_action, time_calculate(action)); + Action _action(size); + std::cout << " " << _action.nb_op_base()*_nb_calc/(time_action*1e6) << " "; + _action.initialize(); + time_action = std::min(time_action, time_calculate(_action)); } time_action = time_action / (double(_nb_calc)); @@ -66,13 +68,13 @@ public: action.initialize(); action.calculate(); action.check_result(); - return action.nb_op_base()/(time_action*1000000.0); } BTL_DONT_INLINE double time_calculate(Action & action) { // time measurement + action.calculate(); _chronos.start(); for (int ii=0;ii<_nb_calc;ii++) { diff --git a/bench/btl/generic_bench/timers/portable_timer.hh b/bench/btl/generic_bench/timers/portable_timer.hh index 7d4059464..de50394f9 100755 --- a/bench/btl/generic_bench/timers/portable_timer.hh +++ b/bench/btl/generic_bench/timers/portable_timer.hh @@ -39,6 +39,38 @@ // A timer object measures CPU time. +// class Portable_Timer +// { +// public: +// +// Portable_Timer( void ) +// { +// } +// +// +// void start() { m_val = getTime(); } +// +// void stop() { m_val = getTime() - m_val; } +// +// double elapsed() { return m_val; } +// +// double user_time() { return elapsed(); } +// +// +// private: +// +// static inline double getTime(void) +// { +// struct timeval tv; +// struct timezone tz; +// gettimeofday(&tv, &tz); +// return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec; +// } +// +// double m_val; +// +// }; // Portable_Timer + class Portable_Timer { public: @@ -46,42 +78,42 @@ class Portable_Timer Portable_Timer( void ):_utime_sec_start(-1), _utime_usec_start(-1), _utime_sec_stop(-1), - _utime_usec_stop(-1) + _utime_usec_stop(-1)/*, + m_prev_cs(-1)*/ { } void start() { - int status=getrusage(RUSAGE_SELF, &resourcesUsage) ; - - _start_time = std::clock(); - +// _start_time = std::clock(); _utime_sec_start = resourcesUsage.ru_utime.tv_sec ; _utime_usec_start = resourcesUsage.ru_utime.tv_usec ; +// m_prev_cs = resourcesUsage.ru_nivcsw; } void stop() { - int status=getrusage(RUSAGE_SELF, &resourcesUsage) ; - - _stop_time = std::clock(); - +// _stop_time = std::clock(); _utime_sec_stop = resourcesUsage.ru_utime.tv_sec ; _utime_usec_stop = resourcesUsage.ru_utime.tv_usec ; +// m_prev_cs = resourcesUsage.ru_nivcsw - m_prev_cs; +// std::cerr << resourcesUsage.ru_nvcsw << " + " << resourcesUsage.ru_nivcsw << "\n"; + } double elapsed() { - return double(_stop_time - _start_time) / CLOCKS_PER_SEC; + return user_time();//double(_stop_time - _start_time) / CLOCKS_PER_SEC; } double user_time() { +// std::cout << m_prev_cs << "\n"; long tot_utime_sec=_utime_sec_stop-_utime_sec_start; long tot_utime_usec=_utime_usec_stop-_utime_usec_start; return double(tot_utime_sec)+ double(tot_utime_usec)/double(USEC_IN_SEC) ; @@ -98,6 +130,8 @@ private: long _utime_sec_stop ; long _utime_usec_stop ; +// long m_prev_cs; + std::clock_t _start_time; std::clock_t _stop_time; diff --git a/bench/btl/generic_bench/utils/dump_file_x_y.hh b/bench/btl/generic_bench/utils/xy_file.hh index 6b99dcb10..4571bed8f 100644 --- a/bench/btl/generic_bench/utils/dump_file_x_y.hh +++ b/bench/btl/generic_bench/utils/xy_file.hh @@ -17,10 +17,41 @@ // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. // -#ifndef DUMP_FILE_X_Y_HH -#define DUMP_FILE_X_Y_HH +#ifndef XY_FILE_HH +#define XY_FILE_HH #include <fstream> +#include <iostream> #include <string> +#include <vector> +using namespace std; + +bool read_xy_file(const std::string & filename, std::vector<int> & tab_sizes, + std::vector<double> & tab_mflops, bool quiet = false) +{ + + std::ifstream input_file (filename.c_str(),std::ios::in); + + if (!input_file){ + if (!quiet) { + INFOS("!!! Error opening "<<filename); + } + return false; + } + + int nb_point=0; + int size=0; + double mflops=0; + + while (input_file >> size >> mflops ){ + nb_point++; + tab_sizes.push_back(size); + tab_mflops.push_back(mflops); + } + SCRUTE(nb_point); + + input_file.close(); + return true; +} // The Vector class must satisfy the following part of STL vector concept : // resize() method @@ -30,16 +61,13 @@ using namespace std; template<class Vector_A, class Vector_B> -void dump_file_x_y(const Vector_A & X, const Vector_B & Y, const std::string & filename){ +void dump_xy_file(const Vector_A & X, const Vector_B & Y, const std::string & filename){ ofstream outfile (filename.c_str(),ios::out) ; int size=X.size(); - for (int i=0;i<size;i++){ - - outfile << X[i] << " " << Y[i] << endl ; - - } + for (int i=0;i<size;i++) + outfile << X[i] << " " << Y[i] << endl; outfile.close(); } diff --git a/bench/btl/libs/C_BLAS/CMakeLists.txt b/bench/btl/libs/C_BLAS/CMakeLists.txt index 73e5cc46b..f72e9caea 100644 --- a/bench/btl/libs/C_BLAS/CMakeLists.txt +++ b/bench/btl/libs/C_BLAS/CMakeLists.txt @@ -1,13 +1,13 @@ -find_package(CBLAS) -if (CBLAS_FOUND) - include_directories(${CBLAS_INCLUDES} ${PROJECT_SOURCE_DIR}/libs/f77) - btl_add_bench(btl_cblas main.cpp) - if(BUILD_btl_cblas) - target_link_libraries(btl_cblas ${CBLAS_LIBRARIES}) - set_target_properties(btl_cblas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ATLAS") - endif(BUILD_btl_cblas) -endif (CBLAS_FOUND) +find_package(ATLAS) +if (ATLAS_FOUND) + include_directories(${ATLAS_INCLUDES} ${PROJECT_SOURCE_DIR}/libs/f77) + btl_add_bench(btl_atlas main.cpp) + if(BUILD_btl_atlas) + target_link_libraries(btl_atlas ${ATLAS_LIBRARIES}) + set_target_properties(btl_atlas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ATLAS -DHAS_LAPACK=1") + endif(BUILD_btl_atlas) +endif (ATLAS_FOUND) find_package(MKL) if (MKL_FOUND) @@ -15,6 +15,6 @@ if (MKL_FOUND) btl_add_bench(btl_mkl main.cpp) if(BUILD_btl_mkl) target_link_libraries(btl_mkl ${MKL_LIBRARIES}) - set_target_properties(btl_mkl PROPERTIES COMPILE_FLAGS "-DCBLASNAME=INTEL_MKL") + set_target_properties(btl_mkl PROPERTIES COMPILE_FLAGS "-DCBLASNAME=INTEL_MKL -DHAS_LAPACK=1") endif(BUILD_btl_mkl) endif (MKL_FOUND) diff --git a/bench/btl/libs/C_BLAS/C_BLAS_interface.hh b/bench/btl/libs/C_BLAS/C_BLAS_interface.hh index eadbf0dd2..49ae90f3d 100644 --- a/bench/btl/libs/C_BLAS/C_BLAS_interface.hh +++ b/bench/btl/libs/C_BLAS/C_BLAS_interface.hh @@ -25,6 +25,11 @@ extern "C" { #include "cblas.h" +#ifdef HAS_LAPACK + // Cholesky Factorization + void spotrf_(const char* uplo, const int* n, float *a, const int* ld, int* info); + void dpotrf_(const char* uplo, const int* n, double *a, const int* ld, int* info); +#endif } #define MAKE_STRING2(S) #S @@ -53,31 +58,31 @@ public : cblas_dgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1); } - static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N) - { + static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N); } - static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N) - { + static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ cblas_dgemm(CblasColMajor,CblasTrans,CblasTrans,N,N,N,1.0,A,N,B,N,0.0,X,N); } - static inline void ata_product(gene_matrix & A, gene_matrix & X, int N) - { + static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){ cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N); } - static inline void aat_product(gene_matrix & A, gene_matrix & X, int N) - { + static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){ cblas_dgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N); } - static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N) - { + static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){ cblas_daxpy(N,coef,X,1,Y,1); } + static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){ + cblas_dscal(N,b,Y,1); + cblas_daxpy(N,a,X,1,Y,1); + } + }; template<> @@ -90,41 +95,62 @@ public : return MAKE_STRING(CBLASNAME); } - static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N) - { + static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ cblas_sgemv(CblasColMajor,CblasNoTrans,N,N,1.0,A,N,B,1,0.0,X,1); } - static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N) - { + static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ cblas_sgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1); } - static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N) - { + static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N); } - static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N) - { + static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N); } - static inline void ata_product(gene_matrix & A, gene_matrix & X, int N) - { + static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){ cblas_sgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N); } - static inline void aat_product(gene_matrix & A, gene_matrix & X, int N) - { + static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){ cblas_sgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N); } - static inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N) - { + static inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N){ cblas_saxpy(N,coef,X,1,Y,1); } + static inline void axpby(float a, const gene_vector & X, float b, gene_vector & Y, int N){ + cblas_sscal(N,b,Y,1); + cblas_saxpy(N,a,X,1,Y,1); + } + + #ifdef HAS_LAPACK + static inline void cholesky(const gene_vector & X, gene_vector & C, int N){ + cblas_scopy(N*N, X, 1, C, 1); + char uplo = 'L'; + int info = 0; + spotrf_(&uplo, &N, C, &N, &info); +// float tmp[N]; +// for (int j = 1; j < N; ++j) +// { +// int endSize = N-j-1; +// if (endSize>0) { + //cblas_sgemv(CblasColMajor, CblasNoTrans, N-j-1, j, ATL_rnone, A+j+1, lda, A+j, lda, ATL_rone, Ac+j+1, 1); +// cblas_sgemv(CblasColMajor, CblasNoTrans,endSize,j, 1.0, &(C[j+1]),N, &(C[j]),N, 0.0, &(C[j+1+N*j]),1); +// } +// } + } + #endif + + static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){ + cblas_scopy(N, B, 1, X, 1); + cblas_strsv(CblasColMajor, CblasLower, CblasNoTrans, CblasNonUnit, N, L, N, X, 1); + } + }; diff --git a/bench/btl/libs/C_BLAS/main.cpp b/bench/btl/libs/C_BLAS/main.cpp index bef5d30be..3e38d6985 100644 --- a/bench/btl/libs/C_BLAS/main.cpp +++ b/bench/btl/libs/C_BLAS/main.cpp @@ -20,13 +20,11 @@ #include "utilities.h" #include "C_BLAS_interface.hh" #include "bench.hh" -#include "action_matrix_vector_product.hh" -#include "action_matrix_matrix_product.hh" -#include "action_atv_product.hh" -#include "action_axpy.hh" -#include "action_lu_solve.hh" -#include "action_ata_product.hh" -#include "action_aat_product.hh" +#include "basic_actions.hh" + +#ifdef HAS_LAPACK +#include "action_cholesky.hh" +#endif BTL_MAIN; @@ -34,17 +32,21 @@ int main() { bench<Action_axpy<C_BLAS_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench<Action_axpby<C_BLAS_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); bench<Action_matrix_vector_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); - bench<Action_atv_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_matrix_matrix_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); - bench<Action_ata_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); - bench<Action_aat_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + bench<Action_trisolve<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + + #ifdef HAS_LAPACK + bench<Action_cholesky<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + #endif + //bench<Action_lu_solve<C_BLAS_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT); return 0; diff --git a/bench/btl/libs/STL/STL_interface.hh b/bench/btl/libs/STL/STL_interface.hh index 5b1a384af..d7ef9a215 100644 --- a/bench/btl/libs/STL/STL_interface.hh +++ b/bench/btl/libs/STL/STL_interface.hh @@ -146,6 +146,22 @@ public : Y[i]+=coef*X[i]; } + static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){ + for (int i=0;i<N;i++) + Y[i] = a*X[i] + b*Y[i]; + } + + static inline void trisolve_lower(const gene_matrix & L, const gene_vector & B, gene_vector & X, int N){ + copy_vector(B,X,N); + for(int i=0; i<N; ++i) + { + X[i] /= L[i][i]; + real tmp = X[i]; + for (int j=i+1; j<N; ++j) + X[j] -= tmp * L[i][j]; + } + } + static inline real norm_diff(const stl_vector & A, const stl_vector & B) { int N=A.size(); diff --git a/bench/btl/libs/STL/main.cpp b/bench/btl/libs/STL/main.cpp index 4cb846633..35f186402 100644 --- a/bench/btl/libs/STL/main.cpp +++ b/bench/btl/libs/STL/main.cpp @@ -20,19 +20,14 @@ #include "utilities.h" #include "STL_interface.hh" #include "bench.hh" -#include "action_matrix_vector_product.hh" -#include "action_matrix_matrix_product.hh" -#include "action_axpy.hh" -#include "action_lu_solve.hh" -#include "action_ata_product.hh" -#include "action_aat_product.hh" -#include "action_atv_product.hh" +#include "basic_actions.hh" BTL_MAIN; int main() { bench<Action_axpy<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench<Action_axpby<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_atv_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_matrix_matrix_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); diff --git a/bench/btl/libs/eigen2/CMakeLists.txt b/bench/btl/libs/eigen2/CMakeLists.txt index ba55ef5f0..f452dc901 100644 --- a/bench/btl/libs/eigen2/CMakeLists.txt +++ b/bench/btl/libs/eigen2/CMakeLists.txt @@ -3,13 +3,28 @@ find_package(Eigen2) if (EIGEN2_FOUND) include_directories(${EIGEN2_INCLUDE_DIR}) - btl_add_bench(btl_eigen2 main.cpp) + btl_add_bench(btl_eigen2_linear main_linear.cpp) + btl_add_bench(btl_eigen2_vecmat main_vecmat.cpp) + btl_add_bench(btl_eigen2_matmat main_matmat.cpp) + btl_add_bench(btl_eigen2_adv main_adv.cpp) IF(NOT BTL_NOVEC) - btl_add_bench(btl_eigen2_novec main.cpp) - if(BUILD_btl_eigen2_novec) - set_target_properties(btl_eigen2_novec PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE") - endif(BUILD_btl_eigen2_novec) + btl_add_bench(btl_eigen2_novec_linear main_linear.cpp) + btl_add_bench(btl_eigen2_novec_vecmat main_vecmat.cpp) + btl_add_bench(btl_eigen2_novec_matmat main_matmat.cpp) + btl_add_bench(btl_eigen2_novec_adv main_adv.cpp) + if(BUILD_btl_eigen2_novec_linear) + set_target_properties(btl_eigen2_novec_linear PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE") + endif(BUILD_btl_eigen2_novec_linear) + if(BUILD_btl_eigen2_novec_vecmat) + set_target_properties(btl_eigen2_novec_vecmat PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE") + endif(BUILD_btl_eigen2_novec_vecmat) + if(BUILD_btl_eigen2_novec_matmat) + set_target_properties(btl_eigen2_novec_matmat PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE") + endif(BUILD_btl_eigen2_novec_matmat) + if(BUILD_btl_eigen2_novec_adv) + set_target_properties(btl_eigen2_novec_adv PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE") + endif(BUILD_btl_eigen2_novec_adv) ENDIF(NOT BTL_NOVEC) btl_add_bench(btl_tiny_eigen2 btl_tiny_eigen2.cpp OFF) diff --git a/bench/btl/libs/eigen2/eigen2_interface.hh b/bench/btl/libs/eigen2/eigen2_interface.hh index 4ce4af165..13c7058ed 100644 --- a/bench/btl/libs/eigen2/eigen2_interface.hh +++ b/bench/btl/libs/eigen2/eigen2_interface.hh @@ -19,6 +19,7 @@ #define EIGEN2_INTERFACE_HH #include <Eigen/Core> +#include <Eigen/Cholesky> #include <vector> #include "btl.hh" @@ -107,17 +108,21 @@ public : } static inline void matrix_vector_product(const gene_matrix & __restrict__ A, const gene_vector & __restrict__ B, gene_vector & __restrict__ X, int N){ - X = (A*B).lazy(); + X = (A*B)/*.lazy()*/; } static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ - X = (A.transpose()*B).lazy(); + X = (A.transpose()*B)/*.lazy()*/; } - static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){ + static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){ Y += coef * X; } + static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){ + Y = a*X + b*Y; + } + static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){ cible = source; } @@ -126,6 +131,16 @@ public : cible = source; } + static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector& X, int N){ + X = L.template marked<Lower>().inverseProduct(B); + } + + static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){ +// C = X; +// Cholesky<gene_matrix>::computeInPlace(C); + C = X.cholesky().matrixL(); + } + }; #endif diff --git a/bench/btl/libs/eigen2/main_adv.cpp b/bench/btl/libs/eigen2/main_adv.cpp new file mode 100644 index 000000000..db38b67f4 --- /dev/null +++ b/bench/btl/libs/eigen2/main_adv.cpp @@ -0,0 +1,34 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "eigen2_interface.hh" +#include "bench.hh" +#include "action_trisolve.hh" +#include "action_cholesky.hh" + +BTL_MAIN; + +int main() +{ + bench<Action_trisolve<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + bench<Action_cholesky<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + + return 0; +} + + diff --git a/bench/btl/libs/eigen2/main_linear.cpp b/bench/btl/libs/eigen2/main_linear.cpp new file mode 100644 index 000000000..e79927b0f --- /dev/null +++ b/bench/btl/libs/eigen2/main_linear.cpp @@ -0,0 +1,34 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "eigen2_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + + bench<Action_axpy<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench<Action_axpby<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); + + return 0; +} + + diff --git a/bench/btl/libs/eigen2/main_matmat.cpp b/bench/btl/libs/eigen2/main_matmat.cpp new file mode 100644 index 000000000..bdebc0832 --- /dev/null +++ b/bench/btl/libs/eigen2/main_matmat.cpp @@ -0,0 +1,34 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "eigen2_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + bench<Action_matrix_matrix_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + bench<Action_ata_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + bench<Action_aat_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + + return 0; +} + + diff --git a/bench/btl/libs/eigen2/main.cpp b/bench/btl/libs/eigen2/main_vecmat.cpp index 086dd75c8..eadc9a885 100644 --- a/bench/btl/libs/eigen2/main.cpp +++ b/bench/btl/libs/eigen2/main_vecmat.cpp @@ -18,29 +18,15 @@ #include "utilities.h" #include "eigen2_interface.hh" #include "bench.hh" -#include "action_matrix_vector_product.hh" -#include "action_matrix_matrix_product.hh" -#include "action_axpy.hh" -#include "action_lu_solve.hh" -#include "action_ata_product.hh" -#include "action_aat_product.hh" -#include "action_atv_product.hh" +#include "basic_actions.hh" BTL_MAIN; int main() { - bench<Action_matrix_vector_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); - - bench<Action_axpy<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); -// bench<Action_matrix_matrix_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); -// bench<Action_ata_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); -// bench<Action_aat_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_atv_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); - //bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT); - return 0; } diff --git a/bench/btl/libs/gmm/gmm_interface.hh b/bench/btl/libs/gmm/gmm_interface.hh index 6a81fe969..7ef41c0cf 100644 --- a/bench/btl/libs/gmm/gmm_interface.hh +++ b/bench/btl/libs/gmm/gmm_interface.hh @@ -106,6 +106,10 @@ public : gmm::add(gmm::scaled(X,coef), Y); } + static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){ + gmm::add(gmm::scaled(X,a), gmm::scaled(Y,b), Y); + } + static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){ gmm::copy(source,cible); } @@ -114,6 +118,12 @@ public : gmm::copy(source,cible); } + static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){ + gmm::copy(B,X); + gmm::lower_tri_solve(L, X, false); + } + + }; #endif diff --git a/bench/btl/libs/gmm/main.cpp b/bench/btl/libs/gmm/main.cpp index a05fd1b46..26fdf76bd 100644 --- a/bench/btl/libs/gmm/main.cpp +++ b/bench/btl/libs/gmm/main.cpp @@ -18,13 +18,7 @@ #include "utilities.h" #include "gmm_interface.hh" #include "bench.hh" -#include "action_matrix_vector_product.hh" -#include "action_matrix_matrix_product.hh" -#include "action_axpy.hh" -#include "action_lu_solve.hh" -#include "action_ata_product.hh" -#include "action_aat_product.hh" -#include "action_atv_product.hh" +#include "basic_actions.hh" BTL_MAIN; @@ -32,12 +26,16 @@ int main() { bench<Action_axpy<gmm_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench<Action_axpby<gmm_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench<Action_matrix_vector_product<gmm_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_atv_product<gmm_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); + bench<Action_matrix_matrix_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_ata_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_aat_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + bench<Action_trisolve<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); //bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT); return 0; diff --git a/bench/btl/libs/hand_vec/hand_vec_interface.hh b/bench/btl/libs/hand_vec/hand_vec_interface.hh index 538c03ba6..fb2e61c75 100755 --- a/bench/btl/libs/hand_vec/hand_vec_interface.hh +++ b/bench/btl/libs/hand_vec/hand_vec_interface.hh @@ -68,96 +68,152 @@ public : #endif } -static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N) + static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N) { asm("#begin matrix_vector_product"); int AN = (N/PacketSize)*PacketSize; - int ANP = (AN/(4*PacketSize))*4*PacketSize; + int ANP = (AN/(2*PacketSize))*2*PacketSize; int bound = (N/4)*4; for (int i=0;i<N;i++) X[i] = 0; for (int i=0;i<bound;i+=4) { - real tmp0 = B[i]; - Packet ptmp0 = ei_pset1(tmp0); - real tmp1 = B[i+1]; - Packet ptmp1 = ei_pset1(tmp1); - real tmp2 = B[i+2]; - Packet ptmp2 = ei_pset1(tmp2); - real tmp3 = B[i+3]; - Packet ptmp3 = ei_pset1(tmp3); - int iN0 = i*N; - int iN1 = (i+1)*N; - int iN2 = (i+2)*N; - int iN3 = (i+3)*N; + register real* __restrict__ A0 = A + i*N; + register real* __restrict__ A1 = A + (i+1)*N; + register real* __restrict__ A2 = A + (i+2)*N; + register real* __restrict__ A3 = A + (i+3)*N; + + Packet ptmp0 = ei_pset1(B[i]); + Packet ptmp1 = ei_pset1(B[i+1]); + Packet ptmp2 = ei_pset1(B[i+2]); + Packet ptmp3 = ei_pset1(B[i+3]); +// register Packet ptmp0, ptmp1, ptmp2, ptmp3; +// asm( +// +// "movss (%[B],%[j],4), %[ptmp0] \n\t" +// "shufps $0,%[ptmp0],%[ptmp0] \n\t" +// "movss 4(%[B],%[j],4), %[ptmp1] \n\t" +// "shufps $0,%[ptmp1],%[ptmp1] \n\t" +// "movss 8(%[B],%[j],4), %[ptmp2] \n\t" +// "shufps $0,%[ptmp2],%[ptmp2] \n\t" +// "movss 12(%[B],%[j],4), %[ptmp3] \n\t" +// "shufps $0,%[ptmp3],%[ptmp3] \n\t" +// : [ptmp0] "=x" (ptmp0), +// [ptmp1] "=x" (ptmp1), +// [ptmp2] "=x" (ptmp2), +// [ptmp3] "=x" (ptmp3) +// : [B] "r" (B), +// [j] "r" (size_t(i)) +// : ); + if (AN>0) { -// int aligned0 = (iN0 % PacketSize); - int aligned1 = (iN1 % PacketSize); - - if (aligned1==0) - { - for (int j = 0;j<AN;j+=PacketSize) - { - ei_pstore(&X[j], - ei_padd(ei_pload(&X[j]), - ei_padd( - ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_pload(&A[j+iN1]))), - ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_pload(&A[j+iN3]))) ))); - } - } - else if (aligned1==2) - { - for (int j = 0;j<AN;j+=PacketSize) - { - ei_pstore(&X[j], - ei_padd(ei_pload(&X[j]), - ei_padd( - ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))), - ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) ))); - } - } - else - { - for (int j = 0;j<ANP;j+=4*PacketSize) +// for (size_t j = 0;j<ANP;j+=8) +// { +// asm( +// +// "movaps (%[A0],%[j],4), %%xmm8 \n\t" +// "movaps 16(%[A0],%[j],4), %%xmm12 \n\t" +// "movups (%[A3],%[j],4), %%xmm11 \n\t" +// "movups 16(%[A3],%[j],4), %%xmm15 \n\t" +// "movups (%[A2],%[j],4), %%xmm10 \n\t" +// "movups 16(%[A2],%[j],4), %%xmm14 \n\t" +// "movups (%[A1],%[j],4), %%xmm9 \n\t" +// "movups 16(%[A1],%[j],4), %%xmm13 \n\t" +// +// "mulps %[ptmp0], %%xmm8 \n\t" +// "addps (%[res0],%[j],4), %%xmm8 \n\t" +// "mulps %[ptmp3], %%xmm11 \n\t" +// "addps %%xmm11, %%xmm8 \n\t" +// "mulps %[ptmp2], %%xmm10 \n\t" +// "addps %%xmm10, %%xmm8 \n\t" +// "mulps %[ptmp1], %%xmm9 \n\t" +// "addps %%xmm9, %%xmm8 \n\t" +// "movaps %%xmm8, (%[res0],%[j],4) \n\t" +// +// "mulps %[ptmp0], %%xmm12 \n\t" +// "addps 16(%[res0],%[j],4), %%xmm12 \n\t" +// "mulps %[ptmp3], %%xmm15 \n\t" +// "addps %%xmm15, %%xmm12 \n\t" +// "mulps %[ptmp2], %%xmm14 \n\t" +// "addps %%xmm14, %%xmm12 \n\t" +// "mulps %[ptmp1], %%xmm13 \n\t" +// "addps %%xmm13, %%xmm12 \n\t" +// "movaps %%xmm12, 16(%[res0],%[j],4) \n\t" +// : +// : [res0] "r" (X), [j] "r" (j),[A0] "r" (A0), +// [A1] "r" (A1), +// [A2] "r" (A2), +// [A3] "r" (A3), +// [ptmp0] "x" (ptmp0), +// [ptmp1] "x" (ptmp1), +// [ptmp2] "x" (ptmp2), +// [ptmp3] "x" (ptmp3) +// : "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "%r14"); +// } + register Packet A00; + register Packet A01; + register Packet A02; + register Packet A03; + register Packet A10; + register Packet A11; + register Packet A12; + register Packet A13; + for (int j = 0;j<ANP;j+=2*PacketSize) { +// A00 = ei_pload(&A0[j]); +// A01 = ei_ploadu(&A1[j]); +// A02 = ei_ploadu(&A2[j]); +// A03 = ei_ploadu(&A3[j]); +// A10 = ei_pload(&A0[j+PacketSize]); +// A11 = ei_ploadu(&A1[j+PacketSize]); +// A12 = ei_ploadu(&A2[j+PacketSize]); +// A13 = ei_ploadu(&A3[j+PacketSize]); +// +// A00 = ei_pmul(ptmp0, A00); +// A01 = ei_pmul(ptmp1, A01); +// A02 = ei_pmul(ptmp2, A02); +// A03 = ei_pmul(ptmp3, A03); +// A10 = ei_pmul(ptmp0, A10); +// A11 = ei_pmul(ptmp1, A11); +// A12 = ei_pmul(ptmp2, A12); +// A13 = ei_pmul(ptmp3, A13); +// +// A00 = ei_padd(A00,A01); +// A02 = ei_padd(A02,A03); +// A00 = ei_padd(A00,ei_pload(&X[j])); +// A00 = ei_padd(A00,A02); +// ei_pstore(&X[j],A00); +// +// A10 = ei_padd(A10,A11); +// A12 = ei_padd(A12,A13); +// A10 = ei_padd(A10,ei_pload(&X[j+PacketSize])); +// A10 = ei_padd(A10,A12); +// ei_pstore(&X[j+PacketSize],A10); + ei_pstore(&X[j], ei_padd(ei_pload(&X[j]), ei_padd( - ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))), - ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) ))); + ei_padd(ei_pmul(ptmp0,ei_pload(&A0[j])),ei_pmul(ptmp1,ei_ploadu(&A1[j]))), + ei_padd(ei_pmul(ptmp2,ei_ploadu(&A2[j])),ei_pmul(ptmp3,ei_ploadu(&A3[j]))) ))); ei_pstore(&X[j+PacketSize], ei_padd(ei_pload(&X[j+PacketSize]), ei_padd( - ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+PacketSize+iN1]))), - ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+PacketSize+iN3]))) ))); - - ei_pstore(&X[j+2*PacketSize], - ei_padd(ei_pload(&X[j+2*PacketSize]), - ei_padd( - ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+2*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+2*PacketSize+iN1]))), - ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+2*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+2*PacketSize+iN3]))) ))); - - ei_pstore(&X[j+3*PacketSize], - ei_padd(ei_pload(&X[j+3*PacketSize]), - ei_padd( - ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+3*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+3*PacketSize+iN1]))), - ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+3*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+3*PacketSize+iN3]))) ))); - + ei_padd(ei_pmul(ptmp0,ei_pload(&A0[j+PacketSize])),ei_pmul(ptmp1,ei_ploadu(&A1[j+PacketSize]))), + ei_padd(ei_pmul(ptmp2,ei_ploadu(&A2[j+PacketSize])),ei_pmul(ptmp3,ei_ploadu(&A3[j+PacketSize]))) ))); } for (int j = ANP;j<AN;j+=PacketSize) ei_pstore(&X[j], ei_padd(ei_pload(&X[j]), ei_padd( - ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))), - ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) ))); - } + ei_padd(ei_pmul(ptmp0,ei_pload(&A0[j])),ei_pmul(ptmp1,ei_ploadu(&A1[j]))), + ei_padd(ei_pmul(ptmp2,ei_ploadu(&A2[j])),ei_pmul(ptmp3,ei_ploadu(&A3[j]))) ))); } // process remaining scalars for (int j=AN;j<N;j++) - X[j] += tmp0 * A[j+iN0] + tmp1 * A[j+iN1]; + X[j] += ei_pfirst(ptmp0) * A0[j] + ei_pfirst(ptmp1) * A1[j] + ei_pfirst(ptmp2) * A2[j] + ei_pfirst(ptmp3) * A3[j]; } for (int i=bound;i<N;i++) { @@ -185,6 +241,119 @@ static inline void matrix_vector_product(const gene_matrix & A, const gene_vecto // { // asm("#begin matrix_vector_product"); // int AN = (N/PacketSize)*PacketSize; +// int ANP = (AN/(2*PacketSize))*2*PacketSize; +// int bound = (N/4)*4; +// for (int i=0;i<N;i++) +// X[i] = 0; +// +// for (int i=0;i<bound;i+=4) +// { +// real tmp0 = B[i]; +// Packet ptmp0 = ei_pset1(tmp0); +// real tmp1 = B[i+1]; +// Packet ptmp1 = ei_pset1(tmp1); +// real tmp2 = B[i+2]; +// Packet ptmp2 = ei_pset1(tmp2); +// real tmp3 = B[i+3]; +// Packet ptmp3 = ei_pset1(tmp3); +// int iN0 = i*N; +// int iN1 = (i+1)*N; +// int iN2 = (i+2)*N; +// int iN3 = (i+3)*N; +// if (AN>0) +// { +// // int aligned0 = (iN0 % PacketSize); +// int aligned1 = (iN1 % PacketSize); +// +// if (aligned1==0) +// { +// for (int j = 0;j<AN;j+=PacketSize) +// { +// ei_pstore(&X[j], +// ei_padd(ei_pload(&X[j]), +// ei_padd( +// ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_pload(&A[j+iN1]))), +// ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_pload(&A[j+iN3]))) ))); +// } +// } +// else if (aligned1==2) +// { +// for (int j = 0;j<AN;j+=PacketSize) +// { +// ei_pstore(&X[j], +// ei_padd(ei_pload(&X[j]), +// ei_padd( +// ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))), +// ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) ))); +// } +// } +// else +// { +// for (int j = 0;j<ANP;j+=2*PacketSize) +// { +// ei_pstore(&X[j], +// ei_padd(ei_pload(&X[j]), +// ei_padd( +// ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))), +// ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) ))); +// +// ei_pstore(&X[j+PacketSize], +// ei_padd(ei_pload(&X[j+PacketSize]), +// ei_padd( +// ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+PacketSize+iN1]))), +// ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+PacketSize+iN3]))) ))); +// +// // ei_pstore(&X[j+2*PacketSize], +// // ei_padd(ei_pload(&X[j+2*PacketSize]), +// // ei_padd( +// // ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+2*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+2*PacketSize+iN1]))), +// // ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+2*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+2*PacketSize+iN3]))) ))); +// // +// // ei_pstore(&X[j+3*PacketSize], +// // ei_padd(ei_pload(&X[j+3*PacketSize]), +// // ei_padd( +// // ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+3*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+3*PacketSize+iN1]))), +// // ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+3*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+3*PacketSize+iN3]))) ))); +// +// } +// for (int j = ANP;j<AN;j+=PacketSize) +// ei_pstore(&X[j], +// ei_padd(ei_pload(&X[j]), +// ei_padd( +// ei_padd(ei_pmul(ptmp0,ei_ploadu(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))), +// ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) ))); +// } +// } +// // process remaining scalars +// for (int j=AN;j<N;j++) +// X[j] += tmp0 * A[j+iN0] + tmp1 * A[j+iN1] + tmp2 * A[j+iN2] + tmp3 * A[j+iN3]; +// } +// for (int i=bound;i<N;i++) +// { +// real tmp0 = B[i]; +// Packet ptmp0 = ei_pset1(tmp0); +// int iN0 = i*N; +// if (AN>0) +// { +// bool aligned0 = (iN0 % PacketSize) == 0; +// if (aligned0) +// for (int j = 0;j<AN;j+=PacketSize) +// ei_pstore(&X[j], ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pload(&X[j]))); +// else +// for (int j = 0;j<AN;j+=PacketSize) +// ei_pstore(&X[j], ei_padd(ei_pmul(ptmp0,ei_ploadu(&A[j+iN0])),ei_pload(&X[j]))); +// } +// // process remaining scalars +// for (int j=AN;j<N;j++) +// X[j] += tmp0 * A[j+iN0]; +// } +// asm("#end matrix_vector_product"); +// } + +// static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N) +// { +// asm("#begin matrix_vector_product"); +// int AN = (N/PacketSize)*PacketSize; // for (int i=0;i<N;i++) // X[i] = 0; // diff --git a/bench/btl/libs/mtl4/main.cpp b/bench/btl/libs/mtl4/main.cpp index 8cf1f6fa9..94c81beac 100644 --- a/bench/btl/libs/mtl4/main.cpp +++ b/bench/btl/libs/mtl4/main.cpp @@ -18,13 +18,8 @@ #include "utilities.h" #include "mtl4_interface.hh" #include "bench.hh" -#include "action_matrix_vector_product.hh" -#include "action_matrix_matrix_product.hh" -#include "action_axpy.hh" -#include "action_lu_solve.hh" -#include "action_ata_product.hh" -#include "action_aat_product.hh" -#include "action_atv_product.hh" +#include "basic_actions.hh" +#include "action_cholesky.hh" BTL_MAIN; @@ -32,12 +27,17 @@ int main() { bench<Action_axpy<mtl4_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench<Action_axpby<mtl4_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench<Action_matrix_vector_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_atv_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_matrix_matrix_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); // bench<Action_ata_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); // bench<Action_aat_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + bench<Action_trisolve<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + bench<Action_cholesky<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + return 0; } diff --git a/bench/btl/libs/mtl4/mtl4_interface.hh b/bench/btl/libs/mtl4/mtl4_interface.hh index 5beb936db..a299fa83e 100644 --- a/bench/btl/libs/mtl4/mtl4_interface.hh +++ b/bench/btl/libs/mtl4/mtl4_interface.hh @@ -19,6 +19,7 @@ #define MTL4_INTERFACE_HH #include <boost/numeric/mtl/mtl.hpp> +#include <boost/numeric/mtl/operation/cholesky.hpp> #include <vector> using namespace mtl; @@ -81,6 +82,9 @@ public : static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){ X = (A*B); +// morton_dense<double, doppled_64_row_mask> C(N,N); +// C = B; +// X = (A*C); } static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){ @@ -88,11 +92,11 @@ public : } static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){ -// X = (trans(A)*A); + X = (trans(A)*A); } static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){ -// X = (A*trans(A)); + X = (A*trans(A)); } static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ @@ -107,6 +111,19 @@ public : Y += coef * X; } + static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){ + Y = a*X + b*Y; + } + + static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){ + C = X; + recursive_cholesky(C); + } + + static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){ + X = lower_trisolve(L, B); + } + static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){ cible = source; } diff --git a/bench/btl/libs/ublas/main.cpp b/bench/btl/libs/ublas/main.cpp index acc59364f..6d286e1a3 100644 --- a/bench/btl/libs/ublas/main.cpp +++ b/bench/btl/libs/ublas/main.cpp @@ -20,18 +20,15 @@ #include "utilities.h" #include "ublas_interface.hh" #include "bench.hh" -#include "action_matrix_vector_product.hh" -#include "action_matrix_matrix_product.hh" -#include "action_axpy.hh" -#include "action_ata_product.hh" -#include "action_aat_product.hh" -#include "action_atv_product.hh" +#include "basic_actions.hh" +#include "basic_actions.hh" BTL_MAIN; int main() { bench<Action_axpy<ublas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench<Action_axpby<ublas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); bench<Action_matrix_vector_product<ublas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_atv_product<ublas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); @@ -40,6 +37,8 @@ int main() bench<Action_ata_product<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_aat_product<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + bench<Action_trisolve<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + return 0; } diff --git a/bench/btl/libs/ublas/ublas_interface.hh b/bench/btl/libs/ublas/ublas_interface.hh index 2572f8c21..95cad5195 100644 --- a/bench/btl/libs/ublas/ublas_interface.hh +++ b/bench/btl/libs/ublas/ublas_interface.hh @@ -23,7 +23,9 @@ #include <boost/numeric/ublas/vector.hpp> #include <boost/numeric/ublas/matrix.hpp> #include <boost/numeric/ublas/io.hpp> +#include <boost/numeric/ublas/triangular.hpp> +using namespace boost::numeric; template <class real> class ublas_interface{ @@ -116,6 +118,10 @@ public : Y.plus_assign(coef*X); } + static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){ + Y = a*X + b*Y; + } + static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){ // X = prod(trans(A),A); X.assign(prod(trans(A),A)); @@ -126,6 +132,10 @@ public : X.assign(prod(A,trans(A))); } + static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){ + X = solve(L, B, ublas::lower_tag ()); + } + }; #endif |