aboutsummaryrefslogtreecommitdiffhomepage
path: root/bench/btl
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2008-07-27 11:39:47 +0000
committerGravatar Gael Guennebaud <g.gael@free.fr>2008-07-27 11:39:47 +0000
commit93115619c23bb41fd24b0090cb6adec501edaced (patch)
tree60ae0887b0705b8a994f8ef9baa5324c87883861 /bench/btl
parente9e5261664cc77049f8b77a2c36c535fbd44889c (diff)
* updated benchmark files according to recent renamings
* various improvements in BTL including trisolver and cholesky bench
Diffstat (limited to 'bench/btl')
-rw-r--r--bench/btl/CMakeLists.txt6
-rw-r--r--bench/btl/README13
-rw-r--r--bench/btl/actions/action_axpby.hh124
-rw-r--r--bench/btl/actions/action_cholesky.hh132
-rw-r--r--bench/btl/actions/action_matrix_vector_product.hh5
-rw-r--r--bench/btl/actions/action_trisolve.hh136
-rw-r--r--bench/btl/actions/basic_actions.hh15
-rw-r--r--bench/btl/cmake/FindATLAS.cmake35
-rw-r--r--bench/btl/data/CMakeLists.txt2
-rw-r--r--bench/btl/data/axpby.hh3
-rw-r--r--bench/btl/data/cholesky.hh3
-rwxr-xr-xbench/btl/data/go_mean12
-rw-r--r--bench/btl/data/mean.cxx25
-rw-r--r--bench/btl/data/trisolve.hh3
-rw-r--r--bench/btl/generic_bench/bench.hh81
-rw-r--r--bench/btl/generic_bench/bench_parameter.hh2
-rw-r--r--bench/btl/generic_bench/btl.hh33
-rw-r--r--bench/btl/generic_bench/init/init_vector.hh8
-rw-r--r--bench/btl/generic_bench/static/bench_static.hh2
-rw-r--r--bench/btl/generic_bench/timers/portable_perf_analyzer.hh16
-rwxr-xr-xbench/btl/generic_bench/timers/portable_timer.hh54
-rw-r--r--bench/btl/generic_bench/utils/xy_file.hh (renamed from bench/btl/generic_bench/utils/dump_file_x_y.hh)44
-rw-r--r--bench/btl/libs/C_BLAS/CMakeLists.txt20
-rw-r--r--bench/btl/libs/C_BLAS/C_BLAS_interface.hh74
-rw-r--r--bench/btl/libs/C_BLAS/main.cpp22
-rw-r--r--bench/btl/libs/STL/STL_interface.hh16
-rw-r--r--bench/btl/libs/STL/main.cpp9
-rw-r--r--bench/btl/libs/eigen2/CMakeLists.txt25
-rw-r--r--bench/btl/libs/eigen2/eigen2_interface.hh21
-rw-r--r--bench/btl/libs/eigen2/main_adv.cpp34
-rw-r--r--bench/btl/libs/eigen2/main_linear.cpp34
-rw-r--r--bench/btl/libs/eigen2/main_matmat.cpp34
-rw-r--r--bench/btl/libs/eigen2/main_vecmat.cpp (renamed from bench/btl/libs/eigen2/main.cpp)16
-rw-r--r--bench/btl/libs/gmm/gmm_interface.hh10
-rw-r--r--bench/btl/libs/gmm/main.cpp12
-rwxr-xr-xbench/btl/libs/hand_vec/hand_vec_interface.hh295
-rw-r--r--bench/btl/libs/mtl4/main.cpp14
-rw-r--r--bench/btl/libs/mtl4/mtl4_interface.hh21
-rw-r--r--bench/btl/libs/ublas/main.cpp11
-rw-r--r--bench/btl/libs/ublas/ublas_interface.hh10
40 files changed, 1182 insertions, 250 deletions
diff --git a/bench/btl/CMakeLists.txt b/bench/btl/CMakeLists.txt
index 600fc38f1..a7f90a2e9 100644
--- a/bench/btl/CMakeLists.txt
+++ b/bench/btl/CMakeLists.txt
@@ -26,6 +26,11 @@ include_directories(
${PROJECT_SOURCE_DIR}/generic_bench/utils
${PROJECT_SOURCE_DIR}/libs/STL)
+# find_package(MKL)
+# if (MKL_FOUND)
+# add_definitions(-DHAVE_MKL)
+# set(DEFAULT_LIBRARIES ${MKL_LIBRARIES})
+# endif (MKL_FOUND)
MACRO(BTL_ADD_BENCH targetname)
@@ -49,6 +54,7 @@ MACRO(BTL_ADD_BENCH targetname)
IF(BUILD_${targetname})
ADD_EXECUTABLE(${targetname} ${_sources})
ADD_TEST(${targetname} "${targetname}")
+ target_link_libraries(${targetname} ${DEFAULT_LIBRARIES})
ENDIF(BUILD_${targetname})
ENDMACRO(BTL_ADD_BENCH)
diff --git a/bench/btl/README b/bench/btl/README
index 6d5712bb6..787002f9a 100644
--- a/bench/btl/README
+++ b/bench/btl/README
@@ -29,7 +29,18 @@ BTL uses cmake / ctest:
$ ctest -V
-You can also run a single bench, e.g.: ctest -V -R eigen
+You can run the benchmarks only on libraries matching a given regular expression:
+ ctest -V -R <regexp>
+For instance:
+ ctest -V -R eigen2
+
+You can also select a given set of actions defining the environment variable BTL_CONFIG this way:
+ BTL_CONFIG="-a action1{:action2}*" ctest -V
+An exemple:
+ BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata" ctest -V -R eigen2
+
+Finally, if bench results already exist (the bench*.dat files) then they merges by keeping the best for each matrix size. If you want to overwrite the previous ones you can simply add the "--overwrite" option:
+ BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2
4 : Analyze the result. different data files (.dat) are produced in each libs directories.
If gnuplot is available, choose a directory name in the data directory to store the results and type
diff --git a/bench/btl/actions/action_axpby.hh b/bench/btl/actions/action_axpby.hh
new file mode 100644
index 000000000..7a4f158c1
--- /dev/null
+++ b/bench/btl/actions/action_axpby.hh
@@ -0,0 +1,124 @@
+//=====================================================
+// File : action_axpby.hh
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#ifndef ACTION_AXPBY
+#define ACTION_AXPBY
+#include "utilities.h"
+#include "STL_interface.hh"
+#include <string>
+#include "init/init_function.hh"
+#include "init/init_vector.hh"
+#include "init/init_matrix.hh"
+
+using namespace std;
+
+template<class Interface>
+class Action_axpby {
+
+public :
+
+ // Ctor
+ Action_axpby( int size ):_size(size),_alpha(0.5),_beta(0.95)
+ {
+ MESSAGE("Action_axpby Ctor");
+
+ // STL vector initialization
+ init_vector<pseudo_random>(X_stl,_size);
+ init_vector<pseudo_random>(Y_stl,_size);
+ init_vector<null_function>(resu_stl,_size);
+
+ // generic matrix and vector initialization
+ Interface::vector_from_stl(X_ref,X_stl);
+ Interface::vector_from_stl(Y_ref,Y_stl);
+
+ Interface::vector_from_stl(X,X_stl);
+ Interface::vector_from_stl(Y,Y_stl);
+ }
+
+ // invalidate copy ctor
+ Action_axpby( const Action_axpby & )
+ {
+ INFOS("illegal call to Action_axpby Copy Ctor");
+ exit(1);
+ }
+
+ // Dtor
+ ~Action_axpby( void ){
+ MESSAGE("Action_axpby Dtor");
+
+ // deallocation
+ Interface::free_vector(X_ref);
+ Interface::free_vector(Y_ref);
+
+ Interface::free_vector(X);
+ Interface::free_vector(Y);
+ }
+
+ // action name
+ static inline std::string name( void )
+ {
+ return "axpby_"+Interface::name();
+ }
+
+ double nb_op_base( void ){
+ return 3.0*_size;
+ }
+
+ inline void initialize( void ){
+ Interface::copy_vector(X_ref,X,_size);
+ Interface::copy_vector(Y_ref,Y,_size);
+ }
+
+ inline void calculate( void ) {
+ Interface::axpby(_alpha,X,_beta,Y,_size);
+ }
+
+ void check_result( void ){
+ // calculation check
+ Interface::vector_to_stl(Y,resu_stl);
+
+ STL_interface<typename Interface::real_type>::axpby(_alpha,X_stl,_beta,Y_stl,_size);
+
+ typename Interface::real_type error=
+ STL_interface<typename Interface::real_type>::norm_diff(Y_stl,resu_stl);
+
+ if (error>1.e-6){
+ INFOS("WRONG CALCULATION...residual=" << error);
+ exit(2);
+ }
+ }
+
+private :
+
+ typename Interface::stl_vector X_stl;
+ typename Interface::stl_vector Y_stl;
+ typename Interface::stl_vector resu_stl;
+
+ typename Interface::gene_vector X_ref;
+ typename Interface::gene_vector Y_ref;
+
+ typename Interface::gene_vector X;
+ typename Interface::gene_vector Y;
+
+ typename Interface::real_type _alpha;
+ typename Interface::real_type _beta;
+
+ int _size;
+};
+
+#endif
diff --git a/bench/btl/actions/action_cholesky.hh b/bench/btl/actions/action_cholesky.hh
new file mode 100644
index 000000000..67495b9cb
--- /dev/null
+++ b/bench/btl/actions/action_cholesky.hh
@@ -0,0 +1,132 @@
+//=====================================================
+// File : action_cholesky.hh
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#ifndef ACTION_CHOLESKY
+#define ACTION_CHOLESKY
+#include "utilities.h"
+#include "STL_interface.hh"
+#include <string>
+#include "init/init_function.hh"
+#include "init/init_vector.hh"
+#include "init/init_matrix.hh"
+
+using namespace std;
+
+template<class Interface>
+class Action_cholesky {
+
+public :
+
+ // Ctor
+
+ Action_cholesky( int size ):_size(size)
+ {
+ MESSAGE("Action_cholesky Ctor");
+
+ // STL vector initialization
+ typename Interface::stl_matrix tmp;
+ init_matrix<pseudo_random>(tmp,_size);
+ init_matrix<null_function>(X_stl,_size);
+ STL_interface<typename Interface::real_type>::ata_product(tmp,X_stl,_size);
+
+ init_matrix<null_function>(C_stl,_size);
+ init_matrix<null_function>(resu_stl,_size);
+
+ // generic matrix and vector initialization
+ Interface::matrix_from_stl(X_ref,X_stl);
+ Interface::matrix_from_stl(X,X_stl);
+ Interface::matrix_from_stl(C,C_stl);
+
+ _cost = 0;
+ for (int j=0; j<_size; ++j)
+ {
+ int r = std::max(_size - j -1,0);
+ _cost += 2*(r*j+r+j);
+ }
+ }
+
+ // invalidate copy ctor
+
+ Action_cholesky( const Action_cholesky & )
+ {
+ INFOS("illegal call to Action_cholesky Copy Ctor");
+ exit(1);
+ }
+
+ // Dtor
+
+ ~Action_cholesky( void ){
+
+ MESSAGE("Action_cholesky Dtor");
+
+ // deallocation
+ Interface::free_matrix(X_ref,_size);
+ Interface::free_matrix(X,_size);
+ Interface::free_matrix(C,_size);
+ }
+
+ // action name
+
+ static inline std::string name( void )
+ {
+ return "cholesky_"+Interface::name();
+ }
+
+ double nb_op_base( void ){
+ return _cost;
+ }
+
+ inline void initialize( void ){
+ Interface::copy_matrix(X_ref,X,_size);
+ }
+
+ inline void calculate( void ) {
+ Interface::cholesky(X,C,_size);
+ }
+
+ void check_result( void ){
+ // calculation check
+ Interface::matrix_to_stl(C,resu_stl);
+
+// STL_interface<typename Interface::real_type>::cholesky(X_stl,C_stl,_size);
+//
+// typename Interface::real_type error=
+// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
+//
+// if (error>1.e-6){
+// INFOS("WRONG CALCULATION...residual=" << error);
+// exit(0);
+// }
+
+ }
+
+private :
+
+ typename Interface::stl_matrix X_stl;
+ typename Interface::stl_matrix C_stl;
+ typename Interface::stl_matrix resu_stl;
+
+ typename Interface::gene_matrix X_ref;
+ typename Interface::gene_matrix X;
+ typename Interface::gene_matrix C;
+
+ int _size;
+ int _cost;
+};
+
+#endif
diff --git a/bench/btl/actions/action_matrix_vector_product.hh b/bench/btl/actions/action_matrix_vector_product.hh
index 3a172f292..202bc1a85 100644
--- a/bench/btl/actions/action_matrix_vector_product.hh
+++ b/bench/btl/actions/action_matrix_vector_product.hh
@@ -49,11 +49,10 @@ public :
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
- Interface::vector_from_stl(B_ref,B_stl);
- Interface::vector_from_stl(X_ref,X_stl);
-
Interface::matrix_from_stl(A,A_stl);
+ Interface::vector_from_stl(B_ref,B_stl);
Interface::vector_from_stl(B,B_stl);
+ Interface::vector_from_stl(X_ref,X_stl);
Interface::vector_from_stl(X,X_stl);
}
diff --git a/bench/btl/actions/action_trisolve.hh b/bench/btl/actions/action_trisolve.hh
new file mode 100644
index 000000000..f97f7dbc9
--- /dev/null
+++ b/bench/btl/actions/action_trisolve.hh
@@ -0,0 +1,136 @@
+//=====================================================
+// File : action_trisolve.hh
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#ifndef ACTION_TRISOLVE
+#define ACTION_TRISOLVE
+#include "utilities.h"
+#include "STL_interface.hh"
+#include <string>
+#include "init/init_function.hh"
+#include "init/init_vector.hh"
+#include "init/init_matrix.hh"
+
+using namespace std;
+
+template<class Interface>
+class Action_trisolve {
+
+public :
+
+ // Ctor
+
+ Action_trisolve( int size ):_size(size)
+ {
+ MESSAGE("Action_trisolve Ctor");
+
+ // STL vector initialization
+ init_matrix<pseudo_random>(L_stl,_size);
+ init_vector<pseudo_random>(B_stl,_size);
+ init_vector<null_function>(X_stl,_size);
+ for (int j=0; j<_size; ++j)
+ {
+ for (int i=0; i<j; ++i)
+ L_stl[j][i] = 0;
+ L_stl[j][j] += 3;
+ }
+
+ init_vector<null_function>(resu_stl,_size);
+
+ // generic matrix and vector initialization
+ Interface::matrix_from_stl(L,L_stl);
+ Interface::vector_from_stl(X,X_stl);
+ Interface::vector_from_stl(B,B_stl);
+
+ _cost = 0;
+ for (int j=0; j<_size; ++j)
+ {
+ _cost += 2*j + 1;
+ }
+ }
+
+ // invalidate copy ctor
+
+ Action_trisolve( const Action_trisolve & )
+ {
+ INFOS("illegal call to Action_trisolve Copy Ctor");
+ exit(1);
+ }
+
+ // Dtor
+
+ ~Action_trisolve( void ){
+
+ MESSAGE("Action_trisolve Dtor");
+
+ // deallocation
+ Interface::free_matrix(L,_size);
+ Interface::free_vector(B);
+ Interface::free_vector(X);
+ }
+
+ // action name
+
+ static inline std::string name( void )
+ {
+ return "trisolve_"+Interface::name();
+ }
+
+ double nb_op_base( void ){
+ return _cost;
+ }
+
+ inline void initialize( void ){
+ //Interface::copy_vector(X_ref,X,_size);
+ }
+
+ inline void calculate( void ) {
+ Interface::trisolve_lower(L,B,X,_size);
+ }
+
+ void check_result( void ){
+ // calculation check
+ Interface::vector_to_stl(X,resu_stl);
+
+ STL_interface<typename Interface::real_type>::trisolve_lower(L_stl,B_stl,X_stl,_size);
+
+ typename Interface::real_type error=
+ STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
+
+ if (error>1.e-4){
+ INFOS("WRONG CALCULATION...residual=" << error);
+ exit(2);
+ } //else INFOS("CALCULATION OK...residual=" << error);
+
+ }
+
+private :
+
+ typename Interface::stl_matrix L_stl;
+ typename Interface::stl_vector X_stl;
+ typename Interface::stl_vector B_stl;
+ typename Interface::stl_vector resu_stl;
+
+ typename Interface::gene_matrix L;
+ typename Interface::gene_vector X;
+ typename Interface::gene_vector B;
+
+ int _size;
+ int _cost;
+};
+
+#endif
diff --git a/bench/btl/actions/basic_actions.hh b/bench/btl/actions/basic_actions.hh
new file mode 100644
index 000000000..fd2e74950
--- /dev/null
+++ b/bench/btl/actions/basic_actions.hh
@@ -0,0 +1,15 @@
+
+#include "action_axpy.hh"
+#include "action_axpby.hh"
+
+#include "action_matrix_vector_product.hh"
+#include "action_atv_product.hh"
+
+#include "action_matrix_matrix_product.hh"
+#include "action_ata_product.hh"
+#include "action_aat_product.hh"
+
+#include "action_trisolve.hh"
+
+// #include "action_lu_solve.hh"
+
diff --git a/bench/btl/cmake/FindATLAS.cmake b/bench/btl/cmake/FindATLAS.cmake
new file mode 100644
index 000000000..1a7d7e85a
--- /dev/null
+++ b/bench/btl/cmake/FindATLAS.cmake
@@ -0,0 +1,35 @@
+# include(FindLibraryWithDebug)
+
+if (ATLAS_INCLUDES AND ATLAS_LIBRARIES)
+ set(ATLAS_FIND_QUIETLY TRUE)
+endif (ATLAS_INCLUDES AND ATLAS_LIBRARIES)
+
+find_path(ATLAS_INCLUDES
+ NAMES
+ cblas.h
+ PATHS
+ $ENV{ATLASDIR}/include
+ ${INCLUDE_INSTALL_DIR}
+)
+
+find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+find_library(ATLAS_LIB atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+
+find_file(ATLAS_CBLAS libcblas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+find_library(ATLAS_CBLAS cblas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+
+find_file(ATLAS_LAPACK liblapack_atlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+find_library(ATLAS_LAPACK lapack_atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+
+find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+
+if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
+set(ATLAS_LIBRARIES ${ATLAS_LIB} ${ATLAS_CBLAS} ${ATLAS_LAPACK} ${ATLAS_F77BLAS})
+endif(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(ATLAS DEFAULT_MSG
+ ATLAS_INCLUDES ATLAS_LIBRARIES)
+
+mark_as_advanced(ATLAS_INCLUDES ATLAS_LIBRARIES)
diff --git a/bench/btl/data/CMakeLists.txt b/bench/btl/data/CMakeLists.txt
index decfc45e9..9b7e627d3 100644
--- a/bench/btl/data/CMakeLists.txt
+++ b/bench/btl/data/CMakeLists.txt
@@ -1,7 +1,7 @@
ADD_CUSTOM_TARGET(copy_scripts)
-SET(script_files go_mean aat.hh ata.hh axpy.hh order_lib mk_mean_script.sh mk_new_gnuplot.sh mk_gnuplot_script.sh matrix_matrix.hh matrix_vector.hh atv.hh perlib_plot_settings.txt gnuplot_common_settings.hh )
+SET(script_files go_mean aat.hh ata.hh axpy.hh axpby.hh cholesky.hh trisolve.hh order_lib mk_mean_script.sh mk_new_gnuplot.sh mk_gnuplot_script.sh matrix_matrix.hh matrix_vector.hh atv.hh perlib_plot_settings.txt gnuplot_common_settings.hh )
FOREACH(script_file ${script_files})
ADD_CUSTOM_COMMAND(
diff --git a/bench/btl/data/axpby.hh b/bench/btl/data/axpby.hh
new file mode 100644
index 000000000..744e3063f
--- /dev/null
+++ b/bench/btl/data/axpby.hh
@@ -0,0 +1,3 @@
+set title "{/*1.5 Y = alpha * X + beta * Y}" 0.000000,0.000000
+set xlabel "vector size" 0.000000,0.000000
+set xrange [1:1000000]
diff --git a/bench/btl/data/cholesky.hh b/bench/btl/data/cholesky.hh
new file mode 100644
index 000000000..78336b4ee
--- /dev/null
+++ b/bench/btl/data/cholesky.hh
@@ -0,0 +1,3 @@
+set title "{/*1.5 Cholesky decomposition}" 0.000000,0.000000
+set xlabel "matrix size" 0.000000,0.000000
+set xrange [6:1000]
diff --git a/bench/btl/data/go_mean b/bench/btl/data/go_mean
index 422b357a6..05835a8c4 100755
--- a/bench/btl/data/go_mean
+++ b/bench/btl/data/go_mean
@@ -17,11 +17,13 @@ echo '<ul>'\
'</p>' >> $webpagefilename
source mk_mean_script.sh axpy $1 11 2500 100000 250000 > $1/axpy.html
-source mk_mean_script.sh matrix_vector $1 11 50 300 500 > $1/matrix_vector.html
-source mk_mean_script.sh atv $1 11 50 300 500 > $1/atv.html
-# source mk_mean_script.sh matrix_matrix $1 11 100 300 500 > $1/matrix_matrix.html
-# source mk_mean_script.sh aat $1 11 100 300 1000 > $1/aat.html
-# source mk_mean_script.sh ata $1 11 100 300 1000 > $1/ata.html
+source mk_mean_script.sh matrix_vector $1 11 50 300 1000 > $1/matrix_vector.html
+source mk_mean_script.sh atv $1 11 50 300 1000 > $1/atv.html
+source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 > $1/matrix_matrix.html
+source mk_mean_script.sh aat $1 11 100 300 1000 > $1/aat.html
+source mk_mean_script.sh ata $1 11 100 300 1000 > $1/ata.html
+source mk_mean_script.sh trisolve $1 11 100 300 1000 > $1/trisolve.html
+source mk_mean_script.sh cholesky $1 11 100 300 1000 > $1/cholesky.html
## compile the web page ##
diff --git a/bench/btl/data/mean.cxx b/bench/btl/data/mean.cxx
index 96d37dc4a..3e1a82d22 100644
--- a/bench/btl/data/mean.cxx
+++ b/bench/btl/data/mean.cxx
@@ -23,11 +23,11 @@
#include <iostream>
#include <fstream>
#include "bench_parameter.hh"
+#include "utils/xy_file.hh"
#include <set>
using namespace std;
-void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops);
double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max);
class Lib_Mean{
@@ -152,29 +152,6 @@ int main( int argc , char *argv[] )
}
-void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops){
-
- ifstream input_file (filename.c_str(),ios::in) ;
-
- if (!input_file){
- INFOS("!!! Error opening "<<filename);
- exit(0);
- }
-
- int nb_point=0;
- int size=0;
- double mflops=0;
-
- while (input_file >> size >> mflops ){
- nb_point++;
- tab_sizes.push_back(size);
- tab_mflops.push_back(mflops);
- }
- SCRUTE(nb_point);
-
- input_file.close();
-}
-
double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max){
int size=tab_sizes.size();
diff --git a/bench/btl/data/trisolve.hh b/bench/btl/data/trisolve.hh
new file mode 100644
index 000000000..6c3000f2a
--- /dev/null
+++ b/bench/btl/data/trisolve.hh
@@ -0,0 +1,3 @@
+set title "{/*1.5 triangular solver (X = inv(L) * X)}" 0.000000,0.000000
+set xlabel "matrix-vector size" 0.000000,0.000000
+set xrange [6:1000]
diff --git a/bench/btl/generic_bench/bench.hh b/bench/btl/generic_bench/bench.hh
index cace2695d..bb3ca6667 100644
--- a/bench/btl/generic_bench/bench.hh
+++ b/bench/btl/generic_bench/bench.hh
@@ -25,14 +25,16 @@
#include <iostream>
#include "utilities.h"
#include "size_lin_log.hh"
-#include "dump_file_x_y.hh"
+#include "xy_file.hh"
#include <vector>
#include <string>
#include "timers/portable_perf_analyzer.hh"
-//#include "timers/mixed_perf_analyzer.hh"
-//#include "timers/x86_perf_analyzer.hh"
-//#include "timers/STL_perf_analyzer.hh"
-
+// #include "timers/mixed_perf_analyzer.hh"
+// #include "timers/x86_perf_analyzer.hh"
+// #include "timers/STL_perf_analyzer.hh"
+#ifdef HAVE_MKL
+extern "C" void cblas_saxpy(const int, const float, const float*, const int, float *, const int);
+#endif
using namespace std;
template <template<class> class Perf_Analyzer, class Action>
@@ -41,8 +43,6 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
if (BtlConfig::skipAction(Action::name()))
return;
- BTL_DISABLE_SSE_EXCEPTIONS();
-
string filename="bench_"+Action::name()+".dat";
INFOS("starting " <<filename);
@@ -64,14 +64,71 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
{
//INFOS("size=" <<tab_sizes[i]<<" ("<<nb_point-i<<"/"<<nb_point<<")");
std::cout << " " << "size = " << tab_sizes[i] << " " << std::flush;
+
+ BTL_DISABLE_SSE_EXCEPTIONS();
+ #ifdef HAVE_MKL
+ {
+ float dummy;
+ cblas_saxpy(1,0,&dummy,1,&dummy,1);
+ }
+ #endif
+
tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i]);
std::cout << tab_mflops[i] << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl;
}
-// std::cout << "\n";
- // dump the result in a file :
+ if (!BtlConfig::Instance.overwriteResults)
+ {
+ std::vector<int> oldSizes;
+ std::vector<double> oldFlops;
+ if (read_xy_file(filename, oldSizes, oldFlops, true))
+ {
+ // merge the two data
+ std::vector<int> newSizes;
+ std::vector<double> newFlops;
+ int i=0;
+ int j=0;
+ while (i<tab_sizes.size() && j<oldSizes.size())
+ {
+ if (tab_sizes[i] == oldSizes[j])
+ {
+ newSizes.push_back(tab_sizes[i]);
+ newFlops.push_back(std::max(tab_mflops[i], oldFlops[j]));
+ ++i;
+ ++j;
+ }
+ else if (tab_sizes[i] < oldSizes[j])
+ {
+ newSizes.push_back(tab_sizes[i]);
+ newFlops.push_back(tab_mflops[i]);
+ ++i;
+ }
+ else
+ {
+ newSizes.push_back(oldSizes[j]);
+ newFlops.push_back(oldFlops[j]);
+ ++j;
+ }
+ }
+ while (i<tab_sizes.size())
+ {
+ newSizes.push_back(tab_sizes[i]);
+ newFlops.push_back(tab_mflops[i]);
+ ++i;
+ }
+ while (j<oldSizes.size())
+ {
+ newSizes.push_back(oldSizes[j]);
+ newFlops.push_back(oldFlops[j]);
+ ++j;
+ }
+ tab_mflops = newFlops;
+ tab_sizes = newSizes;
+ }
+ }
- dump_file_x_y(tab_sizes,tab_mflops,filename);
+ // dump the result in a file :
+ dump_xy_file(tab_sizes,tab_mflops,filename);
}
@@ -87,8 +144,8 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point ){
// Only for small problem size. Otherwize it will be too long
- //bench<X86_Perf_Analyzer,Action>(size_min,size_max,nb_point);
- //bench<STL_Perf_Analyzer,Action>(size_min,size_max,nb_point);
+// bench<X86_Perf_Analyzer,Action>(size_min,size_max,nb_point);
+// bench<STL_Perf_Analyzer,Action>(size_min,size_max,nb_point);
}
diff --git a/bench/btl/generic_bench/bench_parameter.hh b/bench/btl/generic_bench/bench_parameter.hh
index 5714cb3c2..62ba20e4c 100644
--- a/bench/btl/generic_bench/bench_parameter.hh
+++ b/bench/btl/generic_bench/bench_parameter.hh
@@ -23,7 +23,7 @@
// minimal time for each measurement
#define REAL_TYPE float
// minimal time for each measurement
-#define MIN_TIME 0.5
+#define MIN_TIME 1.0
// nb of point on bench curves
#define NB_POINT 100
// min vector size for axpy bench
diff --git a/bench/btl/generic_bench/btl.hh b/bench/btl/generic_bench/btl.hh
index 784702432..6d6e048b3 100644
--- a/bench/btl/generic_bench/btl.hh
+++ b/bench/btl/generic_bench/btl.hh
@@ -48,7 +48,7 @@
: : [aux] "m" (aux)); \
}
#else
-#define DISABLE_SSE_EXCEPTIONS()
+#define BTL_DISABLE_SSE_EXCEPTIONS()
#endif
/** Enhanced std::string
@@ -163,7 +163,7 @@ class BtlConfig
{
public:
BtlConfig()
- : m_runSingleAction(false)
+ : overwriteResults(false)
{
char * _config;
_config = getenv ("BTL_CONFIG");
@@ -179,33 +179,38 @@ public:
std::cerr << "error processing option: " << config[i] << "\n";
exit(2);
}
- Instance.m_runSingleAction = true;
- Instance.m_singleActionName = config[i+1];
+ Instance.m_selectedActionNames = config[i+1].split(":");
i += 1;
}
+ else if (config[i].beginsWith("--overwrite"))
+ {
+ Instance.overwriteResults = true;
+ }
}
}
BTL_DISABLE_SSE_EXCEPTIONS();
}
- BTL_DONT_INLINE static bool skipAction(const std::string& name)
+ BTL_DONT_INLINE static bool skipAction(const std::string& _name)
{
- if (Instance.m_runSingleAction)
- {
- return !BtlString(name).contains(Instance.m_singleActionName);
- }
+ if (Instance.m_selectedActionNames.empty())
+ return false;
- return false;
+ BtlString name(_name);
+ for (int i=0; i<Instance.m_selectedActionNames.size(); ++i)
+ if (name.contains(Instance.m_selectedActionNames[i]))
+ return false;
+
+ return true;
}
+ static BtlConfig Instance;
+ bool overwriteResults;
protected:
- bool m_runSingleAction;
- BtlString m_singleActionName;
-
- static BtlConfig Instance;
+ std::vector<BtlString> m_selectedActionNames;
};
#define BTL_MAIN \
diff --git a/bench/btl/generic_bench/init/init_vector.hh b/bench/btl/generic_bench/init/init_vector.hh
index faf14f953..efaf0c92e 100644
--- a/bench/btl/generic_bench/init/init_vector.hh
+++ b/bench/btl/generic_bench/init/init_vector.hh
@@ -1,14 +1,14 @@
//=====================================================
// File : init_vector.hh
-// Author : L. Plagne <laurent.plagne@edf.fr)>
+// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:18 CEST 2002
//=====================================================
-//
+//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
-//
+//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
@@ -16,7 +16,7 @@
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-//
+//
#ifndef INIT_VECTOR_HH
#define INIT_VECTOR_HH
diff --git a/bench/btl/generic_bench/static/bench_static.hh b/bench/btl/generic_bench/static/bench_static.hh
index cdb645fc2..730590c67 100644
--- a/bench/btl/generic_bench/static/bench_static.hh
+++ b/bench/btl/generic_bench/static/bench_static.hh
@@ -24,7 +24,7 @@
#include "bench_parameter.hh"
#include <iostream>
#include "utilities.h"
-#include "dump_file_x_y.hh"
+#include "xy_file.hh"
#include "static/static_size_generator.hh"
#include "timers/portable_perf_analyzer.hh"
#include "timers/mixed_perf_analyzer.hh"
diff --git a/bench/btl/generic_bench/timers/portable_perf_analyzer.hh b/bench/btl/generic_bench/timers/portable_perf_analyzer.hh
index d716154fd..d0fe95ce0 100644
--- a/bench/btl/generic_bench/timers/portable_perf_analyzer.hh
+++ b/bench/btl/generic_bench/timers/portable_perf_analyzer.hh
@@ -38,16 +38,16 @@ public:
MESSAGE("Portable_Perf_Analyzer Dtor");
};
-
-
BTL_DONT_INLINE double eval_mflops(int size)
{
Action action(size);
-
- double time_action = time_calculate(action);
+ double time_action = 0;
+ action.initialize();
+ time_action = time_calculate(action);
while (time_action < MIN_TIME)
{
+ //Action action(size);
_nb_calc *= 2;
action.initialize();
time_action = time_calculate(action);
@@ -56,8 +56,10 @@ public:
// optimize
for (int i=1; i<NB_TRIES; ++i)
{
- action.initialize();
- time_action = std::min(time_action, time_calculate(action));
+ Action _action(size);
+ std::cout << " " << _action.nb_op_base()*_nb_calc/(time_action*1e6) << " ";
+ _action.initialize();
+ time_action = std::min(time_action, time_calculate(_action));
}
time_action = time_action / (double(_nb_calc));
@@ -66,13 +68,13 @@ public:
action.initialize();
action.calculate();
action.check_result();
-
return action.nb_op_base()/(time_action*1000000.0);
}
BTL_DONT_INLINE double time_calculate(Action & action)
{
// time measurement
+ action.calculate();
_chronos.start();
for (int ii=0;ii<_nb_calc;ii++)
{
diff --git a/bench/btl/generic_bench/timers/portable_timer.hh b/bench/btl/generic_bench/timers/portable_timer.hh
index 7d4059464..de50394f9 100755
--- a/bench/btl/generic_bench/timers/portable_timer.hh
+++ b/bench/btl/generic_bench/timers/portable_timer.hh
@@ -39,6 +39,38 @@
// A timer object measures CPU time.
+// class Portable_Timer
+// {
+// public:
+//
+// Portable_Timer( void )
+// {
+// }
+//
+//
+// void start() { m_val = getTime(); }
+//
+// void stop() { m_val = getTime() - m_val; }
+//
+// double elapsed() { return m_val; }
+//
+// double user_time() { return elapsed(); }
+//
+//
+// private:
+//
+// static inline double getTime(void)
+// {
+// struct timeval tv;
+// struct timezone tz;
+// gettimeofday(&tv, &tz);
+// return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec;
+// }
+//
+// double m_val;
+//
+// }; // Portable_Timer
+
class Portable_Timer
{
public:
@@ -46,42 +78,42 @@ class Portable_Timer
Portable_Timer( void ):_utime_sec_start(-1),
_utime_usec_start(-1),
_utime_sec_stop(-1),
- _utime_usec_stop(-1)
+ _utime_usec_stop(-1)/*,
+ m_prev_cs(-1)*/
{
}
void start()
{
-
int status=getrusage(RUSAGE_SELF, &resourcesUsage) ;
-
- _start_time = std::clock();
-
+// _start_time = std::clock();
_utime_sec_start = resourcesUsage.ru_utime.tv_sec ;
_utime_usec_start = resourcesUsage.ru_utime.tv_usec ;
+// m_prev_cs = resourcesUsage.ru_nivcsw;
}
void stop()
{
-
int status=getrusage(RUSAGE_SELF, &resourcesUsage) ;
-
- _stop_time = std::clock();
-
+// _stop_time = std::clock();
_utime_sec_stop = resourcesUsage.ru_utime.tv_sec ;
_utime_usec_stop = resourcesUsage.ru_utime.tv_usec ;
+// m_prev_cs = resourcesUsage.ru_nivcsw - m_prev_cs;
+// std::cerr << resourcesUsage.ru_nvcsw << " + " << resourcesUsage.ru_nivcsw << "\n";
+
}
double elapsed()
{
- return double(_stop_time - _start_time) / CLOCKS_PER_SEC;
+ return user_time();//double(_stop_time - _start_time) / CLOCKS_PER_SEC;
}
double user_time()
{
+// std::cout << m_prev_cs << "\n";
long tot_utime_sec=_utime_sec_stop-_utime_sec_start;
long tot_utime_usec=_utime_usec_stop-_utime_usec_start;
return double(tot_utime_sec)+ double(tot_utime_usec)/double(USEC_IN_SEC) ;
@@ -98,6 +130,8 @@ private:
long _utime_sec_stop ;
long _utime_usec_stop ;
+// long m_prev_cs;
+
std::clock_t _start_time;
std::clock_t _stop_time;
diff --git a/bench/btl/generic_bench/utils/dump_file_x_y.hh b/bench/btl/generic_bench/utils/xy_file.hh
index 6b99dcb10..4571bed8f 100644
--- a/bench/btl/generic_bench/utils/dump_file_x_y.hh
+++ b/bench/btl/generic_bench/utils/xy_file.hh
@@ -17,10 +17,41 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
-#ifndef DUMP_FILE_X_Y_HH
-#define DUMP_FILE_X_Y_HH
+#ifndef XY_FILE_HH
+#define XY_FILE_HH
#include <fstream>
+#include <iostream>
#include <string>
+#include <vector>
+using namespace std;
+
+bool read_xy_file(const std::string & filename, std::vector<int> & tab_sizes,
+ std::vector<double> & tab_mflops, bool quiet = false)
+{
+
+ std::ifstream input_file (filename.c_str(),std::ios::in);
+
+ if (!input_file){
+ if (!quiet) {
+ INFOS("!!! Error opening "<<filename);
+ }
+ return false;
+ }
+
+ int nb_point=0;
+ int size=0;
+ double mflops=0;
+
+ while (input_file >> size >> mflops ){
+ nb_point++;
+ tab_sizes.push_back(size);
+ tab_mflops.push_back(mflops);
+ }
+ SCRUTE(nb_point);
+
+ input_file.close();
+ return true;
+}
// The Vector class must satisfy the following part of STL vector concept :
// resize() method
@@ -30,16 +61,13 @@
using namespace std;
template<class Vector_A, class Vector_B>
-void dump_file_x_y(const Vector_A & X, const Vector_B & Y, const std::string & filename){
+void dump_xy_file(const Vector_A & X, const Vector_B & Y, const std::string & filename){
ofstream outfile (filename.c_str(),ios::out) ;
int size=X.size();
- for (int i=0;i<size;i++){
-
- outfile << X[i] << " " << Y[i] << endl ;
-
- }
+ for (int i=0;i<size;i++)
+ outfile << X[i] << " " << Y[i] << endl;
outfile.close();
}
diff --git a/bench/btl/libs/C_BLAS/CMakeLists.txt b/bench/btl/libs/C_BLAS/CMakeLists.txt
index 73e5cc46b..f72e9caea 100644
--- a/bench/btl/libs/C_BLAS/CMakeLists.txt
+++ b/bench/btl/libs/C_BLAS/CMakeLists.txt
@@ -1,13 +1,13 @@
-find_package(CBLAS)
-if (CBLAS_FOUND)
- include_directories(${CBLAS_INCLUDES} ${PROJECT_SOURCE_DIR}/libs/f77)
- btl_add_bench(btl_cblas main.cpp)
- if(BUILD_btl_cblas)
- target_link_libraries(btl_cblas ${CBLAS_LIBRARIES})
- set_target_properties(btl_cblas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ATLAS")
- endif(BUILD_btl_cblas)
-endif (CBLAS_FOUND)
+find_package(ATLAS)
+if (ATLAS_FOUND)
+ include_directories(${ATLAS_INCLUDES} ${PROJECT_SOURCE_DIR}/libs/f77)
+ btl_add_bench(btl_atlas main.cpp)
+ if(BUILD_btl_atlas)
+ target_link_libraries(btl_atlas ${ATLAS_LIBRARIES})
+ set_target_properties(btl_atlas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ATLAS -DHAS_LAPACK=1")
+ endif(BUILD_btl_atlas)
+endif (ATLAS_FOUND)
find_package(MKL)
if (MKL_FOUND)
@@ -15,6 +15,6 @@ if (MKL_FOUND)
btl_add_bench(btl_mkl main.cpp)
if(BUILD_btl_mkl)
target_link_libraries(btl_mkl ${MKL_LIBRARIES})
- set_target_properties(btl_mkl PROPERTIES COMPILE_FLAGS "-DCBLASNAME=INTEL_MKL")
+ set_target_properties(btl_mkl PROPERTIES COMPILE_FLAGS "-DCBLASNAME=INTEL_MKL -DHAS_LAPACK=1")
endif(BUILD_btl_mkl)
endif (MKL_FOUND)
diff --git a/bench/btl/libs/C_BLAS/C_BLAS_interface.hh b/bench/btl/libs/C_BLAS/C_BLAS_interface.hh
index eadbf0dd2..49ae90f3d 100644
--- a/bench/btl/libs/C_BLAS/C_BLAS_interface.hh
+++ b/bench/btl/libs/C_BLAS/C_BLAS_interface.hh
@@ -25,6 +25,11 @@
extern "C"
{
#include "cblas.h"
+#ifdef HAS_LAPACK
+ // Cholesky Factorization
+ void spotrf_(const char* uplo, const int* n, float *a, const int* ld, int* info);
+ void dpotrf_(const char* uplo, const int* n, double *a, const int* ld, int* info);
+#endif
}
#define MAKE_STRING2(S) #S
@@ -53,31 +58,31 @@ public :
cblas_dgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1);
}
- static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
- {
+ static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
}
- static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
- {
+ static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
cblas_dgemm(CblasColMajor,CblasTrans,CblasTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
}
- static inline void ata_product(gene_matrix & A, gene_matrix & X, int N)
- {
+ static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
}
- static inline void aat_product(gene_matrix & A, gene_matrix & X, int N)
- {
+ static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
cblas_dgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
}
- static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N)
- {
+ static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
cblas_daxpy(N,coef,X,1,Y,1);
}
+ static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
+ cblas_dscal(N,b,Y,1);
+ cblas_daxpy(N,a,X,1,Y,1);
+ }
+
};
template<>
@@ -90,41 +95,62 @@ public :
return MAKE_STRING(CBLASNAME);
}
- static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
- {
+ static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
cblas_sgemv(CblasColMajor,CblasNoTrans,N,N,1.0,A,N,B,1,0.0,X,1);
}
- static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
- {
+ static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
cblas_sgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1);
}
- static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
- {
+ static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
}
- static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
- {
+ static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
}
- static inline void ata_product(gene_matrix & A, gene_matrix & X, int N)
- {
+ static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
cblas_sgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
}
- static inline void aat_product(gene_matrix & A, gene_matrix & X, int N)
- {
+ static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
cblas_sgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
}
- static inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N)
- {
+ static inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N){
cblas_saxpy(N,coef,X,1,Y,1);
}
+ static inline void axpby(float a, const gene_vector & X, float b, gene_vector & Y, int N){
+ cblas_sscal(N,b,Y,1);
+ cblas_saxpy(N,a,X,1,Y,1);
+ }
+
+ #ifdef HAS_LAPACK
+ static inline void cholesky(const gene_vector & X, gene_vector & C, int N){
+ cblas_scopy(N*N, X, 1, C, 1);
+ char uplo = 'L';
+ int info = 0;
+ spotrf_(&uplo, &N, C, &N, &info);
+// float tmp[N];
+// for (int j = 1; j < N; ++j)
+// {
+// int endSize = N-j-1;
+// if (endSize>0) {
+ //cblas_sgemv(CblasColMajor, CblasNoTrans, N-j-1, j, ATL_rnone, A+j+1, lda, A+j, lda, ATL_rone, Ac+j+1, 1);
+// cblas_sgemv(CblasColMajor, CblasNoTrans,endSize,j, 1.0, &(C[j+1]),N, &(C[j]),N, 0.0, &(C[j+1+N*j]),1);
+// }
+// }
+ }
+ #endif
+
+ static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
+ cblas_scopy(N, B, 1, X, 1);
+ cblas_strsv(CblasColMajor, CblasLower, CblasNoTrans, CblasNonUnit, N, L, N, X, 1);
+ }
+
};
diff --git a/bench/btl/libs/C_BLAS/main.cpp b/bench/btl/libs/C_BLAS/main.cpp
index bef5d30be..3e38d6985 100644
--- a/bench/btl/libs/C_BLAS/main.cpp
+++ b/bench/btl/libs/C_BLAS/main.cpp
@@ -20,13 +20,11 @@
#include "utilities.h"
#include "C_BLAS_interface.hh"
#include "bench.hh"
-#include "action_matrix_vector_product.hh"
-#include "action_matrix_matrix_product.hh"
-#include "action_atv_product.hh"
-#include "action_axpy.hh"
-#include "action_lu_solve.hh"
-#include "action_ata_product.hh"
-#include "action_aat_product.hh"
+#include "basic_actions.hh"
+
+#ifdef HAS_LAPACK
+#include "action_cholesky.hh"
+#endif
BTL_MAIN;
@@ -34,17 +32,21 @@ int main()
{
bench<Action_axpy<C_BLAS_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+ bench<Action_axpby<C_BLAS_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_vector_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-
bench<Action_atv_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_matrix_matrix_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-
bench<Action_ata_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-
bench<Action_aat_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+ bench<Action_trisolve<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+
+ #ifdef HAS_LAPACK
+ bench<Action_cholesky<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+ #endif
+
//bench<Action_lu_solve<C_BLAS_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
return 0;
diff --git a/bench/btl/libs/STL/STL_interface.hh b/bench/btl/libs/STL/STL_interface.hh
index 5b1a384af..d7ef9a215 100644
--- a/bench/btl/libs/STL/STL_interface.hh
+++ b/bench/btl/libs/STL/STL_interface.hh
@@ -146,6 +146,22 @@ public :
Y[i]+=coef*X[i];
}
+ static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
+ for (int i=0;i<N;i++)
+ Y[i] = a*X[i] + b*Y[i];
+ }
+
+ static inline void trisolve_lower(const gene_matrix & L, const gene_vector & B, gene_vector & X, int N){
+ copy_vector(B,X,N);
+ for(int i=0; i<N; ++i)
+ {
+ X[i] /= L[i][i];
+ real tmp = X[i];
+ for (int j=i+1; j<N; ++j)
+ X[j] -= tmp * L[i][j];
+ }
+ }
+
static inline real norm_diff(const stl_vector & A, const stl_vector & B)
{
int N=A.size();
diff --git a/bench/btl/libs/STL/main.cpp b/bench/btl/libs/STL/main.cpp
index 4cb846633..35f186402 100644
--- a/bench/btl/libs/STL/main.cpp
+++ b/bench/btl/libs/STL/main.cpp
@@ -20,19 +20,14 @@
#include "utilities.h"
#include "STL_interface.hh"
#include "bench.hh"
-#include "action_matrix_vector_product.hh"
-#include "action_matrix_matrix_product.hh"
-#include "action_axpy.hh"
-#include "action_lu_solve.hh"
-#include "action_ata_product.hh"
-#include "action_aat_product.hh"
-#include "action_atv_product.hh"
+#include "basic_actions.hh"
BTL_MAIN;
int main()
{
bench<Action_axpy<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+ bench<Action_axpby<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_matrix_matrix_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
diff --git a/bench/btl/libs/eigen2/CMakeLists.txt b/bench/btl/libs/eigen2/CMakeLists.txt
index ba55ef5f0..f452dc901 100644
--- a/bench/btl/libs/eigen2/CMakeLists.txt
+++ b/bench/btl/libs/eigen2/CMakeLists.txt
@@ -3,13 +3,28 @@ find_package(Eigen2)
if (EIGEN2_FOUND)
include_directories(${EIGEN2_INCLUDE_DIR})
- btl_add_bench(btl_eigen2 main.cpp)
+ btl_add_bench(btl_eigen2_linear main_linear.cpp)
+ btl_add_bench(btl_eigen2_vecmat main_vecmat.cpp)
+ btl_add_bench(btl_eigen2_matmat main_matmat.cpp)
+ btl_add_bench(btl_eigen2_adv main_adv.cpp)
IF(NOT BTL_NOVEC)
- btl_add_bench(btl_eigen2_novec main.cpp)
- if(BUILD_btl_eigen2_novec)
- set_target_properties(btl_eigen2_novec PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
- endif(BUILD_btl_eigen2_novec)
+ btl_add_bench(btl_eigen2_novec_linear main_linear.cpp)
+ btl_add_bench(btl_eigen2_novec_vecmat main_vecmat.cpp)
+ btl_add_bench(btl_eigen2_novec_matmat main_matmat.cpp)
+ btl_add_bench(btl_eigen2_novec_adv main_adv.cpp)
+ if(BUILD_btl_eigen2_novec_linear)
+ set_target_properties(btl_eigen2_novec_linear PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
+ endif(BUILD_btl_eigen2_novec_linear)
+ if(BUILD_btl_eigen2_novec_vecmat)
+ set_target_properties(btl_eigen2_novec_vecmat PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
+ endif(BUILD_btl_eigen2_novec_vecmat)
+ if(BUILD_btl_eigen2_novec_matmat)
+ set_target_properties(btl_eigen2_novec_matmat PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
+ endif(BUILD_btl_eigen2_novec_matmat)
+ if(BUILD_btl_eigen2_novec_adv)
+ set_target_properties(btl_eigen2_novec_adv PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
+ endif(BUILD_btl_eigen2_novec_adv)
ENDIF(NOT BTL_NOVEC)
btl_add_bench(btl_tiny_eigen2 btl_tiny_eigen2.cpp OFF)
diff --git a/bench/btl/libs/eigen2/eigen2_interface.hh b/bench/btl/libs/eigen2/eigen2_interface.hh
index 4ce4af165..13c7058ed 100644
--- a/bench/btl/libs/eigen2/eigen2_interface.hh
+++ b/bench/btl/libs/eigen2/eigen2_interface.hh
@@ -19,6 +19,7 @@
#define EIGEN2_INTERFACE_HH
#include <Eigen/Core>
+#include <Eigen/Cholesky>
#include <vector>
#include "btl.hh"
@@ -107,17 +108,21 @@ public :
}
static inline void matrix_vector_product(const gene_matrix & __restrict__ A, const gene_vector & __restrict__ B, gene_vector & __restrict__ X, int N){
- X = (A*B).lazy();
+ X = (A*B)/*.lazy()*/;
}
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
- X = (A.transpose()*B).lazy();
+ X = (A.transpose()*B)/*.lazy()*/;
}
- static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
+ static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
Y += coef * X;
}
+ static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
+ Y = a*X + b*Y;
+ }
+
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
cible = source;
}
@@ -126,6 +131,16 @@ public :
cible = source;
}
+ static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector& X, int N){
+ X = L.template marked<Lower>().inverseProduct(B);
+ }
+
+ static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
+// C = X;
+// Cholesky<gene_matrix>::computeInPlace(C);
+ C = X.cholesky().matrixL();
+ }
+
};
#endif
diff --git a/bench/btl/libs/eigen2/main_adv.cpp b/bench/btl/libs/eigen2/main_adv.cpp
new file mode 100644
index 000000000..db38b67f4
--- /dev/null
+++ b/bench/btl/libs/eigen2/main_adv.cpp
@@ -0,0 +1,34 @@
+//=====================================================
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#include "utilities.h"
+#include "eigen2_interface.hh"
+#include "bench.hh"
+#include "action_trisolve.hh"
+#include "action_cholesky.hh"
+
+BTL_MAIN;
+
+int main()
+{
+ bench<Action_trisolve<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+ bench<Action_cholesky<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+
+ return 0;
+}
+
+
diff --git a/bench/btl/libs/eigen2/main_linear.cpp b/bench/btl/libs/eigen2/main_linear.cpp
new file mode 100644
index 000000000..e79927b0f
--- /dev/null
+++ b/bench/btl/libs/eigen2/main_linear.cpp
@@ -0,0 +1,34 @@
+//=====================================================
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#include "utilities.h"
+#include "eigen2_interface.hh"
+#include "bench.hh"
+#include "basic_actions.hh"
+
+BTL_MAIN;
+
+int main()
+{
+
+ bench<Action_axpy<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+ bench<Action_axpby<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+
+ return 0;
+}
+
+
diff --git a/bench/btl/libs/eigen2/main_matmat.cpp b/bench/btl/libs/eigen2/main_matmat.cpp
new file mode 100644
index 000000000..bdebc0832
--- /dev/null
+++ b/bench/btl/libs/eigen2/main_matmat.cpp
@@ -0,0 +1,34 @@
+//=====================================================
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#include "utilities.h"
+#include "eigen2_interface.hh"
+#include "bench.hh"
+#include "basic_actions.hh"
+
+BTL_MAIN;
+
+int main()
+{
+ bench<Action_matrix_matrix_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+ bench<Action_ata_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+ bench<Action_aat_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+
+ return 0;
+}
+
+
diff --git a/bench/btl/libs/eigen2/main.cpp b/bench/btl/libs/eigen2/main_vecmat.cpp
index 086dd75c8..eadc9a885 100644
--- a/bench/btl/libs/eigen2/main.cpp
+++ b/bench/btl/libs/eigen2/main_vecmat.cpp
@@ -18,29 +18,15 @@
#include "utilities.h"
#include "eigen2_interface.hh"
#include "bench.hh"
-#include "action_matrix_vector_product.hh"
-#include "action_matrix_matrix_product.hh"
-#include "action_axpy.hh"
-#include "action_lu_solve.hh"
-#include "action_ata_product.hh"
-#include "action_aat_product.hh"
-#include "action_atv_product.hh"
+#include "basic_actions.hh"
BTL_MAIN;
int main()
{
-
bench<Action_matrix_vector_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-
- bench<Action_axpy<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
-// bench<Action_matrix_matrix_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-// bench<Action_ata_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-// bench<Action_aat_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_atv_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
- //bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
-
return 0;
}
diff --git a/bench/btl/libs/gmm/gmm_interface.hh b/bench/btl/libs/gmm/gmm_interface.hh
index 6a81fe969..7ef41c0cf 100644
--- a/bench/btl/libs/gmm/gmm_interface.hh
+++ b/bench/btl/libs/gmm/gmm_interface.hh
@@ -106,6 +106,10 @@ public :
gmm::add(gmm::scaled(X,coef), Y);
}
+ static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
+ gmm::add(gmm::scaled(X,a), gmm::scaled(Y,b), Y);
+ }
+
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
gmm::copy(source,cible);
}
@@ -114,6 +118,12 @@ public :
gmm::copy(source,cible);
}
+ static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
+ gmm::copy(B,X);
+ gmm::lower_tri_solve(L, X, false);
+ }
+
+
};
#endif
diff --git a/bench/btl/libs/gmm/main.cpp b/bench/btl/libs/gmm/main.cpp
index a05fd1b46..26fdf76bd 100644
--- a/bench/btl/libs/gmm/main.cpp
+++ b/bench/btl/libs/gmm/main.cpp
@@ -18,13 +18,7 @@
#include "utilities.h"
#include "gmm_interface.hh"
#include "bench.hh"
-#include "action_matrix_vector_product.hh"
-#include "action_matrix_matrix_product.hh"
-#include "action_axpy.hh"
-#include "action_lu_solve.hh"
-#include "action_ata_product.hh"
-#include "action_aat_product.hh"
-#include "action_atv_product.hh"
+#include "basic_actions.hh"
BTL_MAIN;
@@ -32,12 +26,16 @@ int main()
{
bench<Action_axpy<gmm_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+ bench<Action_axpby<gmm_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+
bench<Action_matrix_vector_product<gmm_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<gmm_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
+
bench<Action_matrix_matrix_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_ata_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+ bench<Action_trisolve<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
//bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
return 0;
diff --git a/bench/btl/libs/hand_vec/hand_vec_interface.hh b/bench/btl/libs/hand_vec/hand_vec_interface.hh
index 538c03ba6..fb2e61c75 100755
--- a/bench/btl/libs/hand_vec/hand_vec_interface.hh
+++ b/bench/btl/libs/hand_vec/hand_vec_interface.hh
@@ -68,96 +68,152 @@ public :
#endif
}
-static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
+ static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
{
asm("#begin matrix_vector_product");
int AN = (N/PacketSize)*PacketSize;
- int ANP = (AN/(4*PacketSize))*4*PacketSize;
+ int ANP = (AN/(2*PacketSize))*2*PacketSize;
int bound = (N/4)*4;
for (int i=0;i<N;i++)
X[i] = 0;
for (int i=0;i<bound;i+=4)
{
- real tmp0 = B[i];
- Packet ptmp0 = ei_pset1(tmp0);
- real tmp1 = B[i+1];
- Packet ptmp1 = ei_pset1(tmp1);
- real tmp2 = B[i+2];
- Packet ptmp2 = ei_pset1(tmp2);
- real tmp3 = B[i+3];
- Packet ptmp3 = ei_pset1(tmp3);
- int iN0 = i*N;
- int iN1 = (i+1)*N;
- int iN2 = (i+2)*N;
- int iN3 = (i+3)*N;
+ register real* __restrict__ A0 = A + i*N;
+ register real* __restrict__ A1 = A + (i+1)*N;
+ register real* __restrict__ A2 = A + (i+2)*N;
+ register real* __restrict__ A3 = A + (i+3)*N;
+
+ Packet ptmp0 = ei_pset1(B[i]);
+ Packet ptmp1 = ei_pset1(B[i+1]);
+ Packet ptmp2 = ei_pset1(B[i+2]);
+ Packet ptmp3 = ei_pset1(B[i+3]);
+// register Packet ptmp0, ptmp1, ptmp2, ptmp3;
+// asm(
+//
+// "movss (%[B],%[j],4), %[ptmp0] \n\t"
+// "shufps $0,%[ptmp0],%[ptmp0] \n\t"
+// "movss 4(%[B],%[j],4), %[ptmp1] \n\t"
+// "shufps $0,%[ptmp1],%[ptmp1] \n\t"
+// "movss 8(%[B],%[j],4), %[ptmp2] \n\t"
+// "shufps $0,%[ptmp2],%[ptmp2] \n\t"
+// "movss 12(%[B],%[j],4), %[ptmp3] \n\t"
+// "shufps $0,%[ptmp3],%[ptmp3] \n\t"
+// : [ptmp0] "=x" (ptmp0),
+// [ptmp1] "=x" (ptmp1),
+// [ptmp2] "=x" (ptmp2),
+// [ptmp3] "=x" (ptmp3)
+// : [B] "r" (B),
+// [j] "r" (size_t(i))
+// : );
+
if (AN>0)
{
-// int aligned0 = (iN0 % PacketSize);
- int aligned1 = (iN1 % PacketSize);
-
- if (aligned1==0)
- {
- for (int j = 0;j<AN;j+=PacketSize)
- {
- ei_pstore(&X[j],
- ei_padd(ei_pload(&X[j]),
- ei_padd(
- ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_pload(&A[j+iN1]))),
- ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_pload(&A[j+iN3]))) )));
- }
- }
- else if (aligned1==2)
- {
- for (int j = 0;j<AN;j+=PacketSize)
- {
- ei_pstore(&X[j],
- ei_padd(ei_pload(&X[j]),
- ei_padd(
- ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
- ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
- }
- }
- else
- {
- for (int j = 0;j<ANP;j+=4*PacketSize)
+// for (size_t j = 0;j<ANP;j+=8)
+// {
+// asm(
+//
+// "movaps (%[A0],%[j],4), %%xmm8 \n\t"
+// "movaps 16(%[A0],%[j],4), %%xmm12 \n\t"
+// "movups (%[A3],%[j],4), %%xmm11 \n\t"
+// "movups 16(%[A3],%[j],4), %%xmm15 \n\t"
+// "movups (%[A2],%[j],4), %%xmm10 \n\t"
+// "movups 16(%[A2],%[j],4), %%xmm14 \n\t"
+// "movups (%[A1],%[j],4), %%xmm9 \n\t"
+// "movups 16(%[A1],%[j],4), %%xmm13 \n\t"
+//
+// "mulps %[ptmp0], %%xmm8 \n\t"
+// "addps (%[res0],%[j],4), %%xmm8 \n\t"
+// "mulps %[ptmp3], %%xmm11 \n\t"
+// "addps %%xmm11, %%xmm8 \n\t"
+// "mulps %[ptmp2], %%xmm10 \n\t"
+// "addps %%xmm10, %%xmm8 \n\t"
+// "mulps %[ptmp1], %%xmm9 \n\t"
+// "addps %%xmm9, %%xmm8 \n\t"
+// "movaps %%xmm8, (%[res0],%[j],4) \n\t"
+//
+// "mulps %[ptmp0], %%xmm12 \n\t"
+// "addps 16(%[res0],%[j],4), %%xmm12 \n\t"
+// "mulps %[ptmp3], %%xmm15 \n\t"
+// "addps %%xmm15, %%xmm12 \n\t"
+// "mulps %[ptmp2], %%xmm14 \n\t"
+// "addps %%xmm14, %%xmm12 \n\t"
+// "mulps %[ptmp1], %%xmm13 \n\t"
+// "addps %%xmm13, %%xmm12 \n\t"
+// "movaps %%xmm12, 16(%[res0],%[j],4) \n\t"
+// :
+// : [res0] "r" (X), [j] "r" (j),[A0] "r" (A0),
+// [A1] "r" (A1),
+// [A2] "r" (A2),
+// [A3] "r" (A3),
+// [ptmp0] "x" (ptmp0),
+// [ptmp1] "x" (ptmp1),
+// [ptmp2] "x" (ptmp2),
+// [ptmp3] "x" (ptmp3)
+// : "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "%r14");
+// }
+ register Packet A00;
+ register Packet A01;
+ register Packet A02;
+ register Packet A03;
+ register Packet A10;
+ register Packet A11;
+ register Packet A12;
+ register Packet A13;
+ for (int j = 0;j<ANP;j+=2*PacketSize)
{
+// A00 = ei_pload(&A0[j]);
+// A01 = ei_ploadu(&A1[j]);
+// A02 = ei_ploadu(&A2[j]);
+// A03 = ei_ploadu(&A3[j]);
+// A10 = ei_pload(&A0[j+PacketSize]);
+// A11 = ei_ploadu(&A1[j+PacketSize]);
+// A12 = ei_ploadu(&A2[j+PacketSize]);
+// A13 = ei_ploadu(&A3[j+PacketSize]);
+//
+// A00 = ei_pmul(ptmp0, A00);
+// A01 = ei_pmul(ptmp1, A01);
+// A02 = ei_pmul(ptmp2, A02);
+// A03 = ei_pmul(ptmp3, A03);
+// A10 = ei_pmul(ptmp0, A10);
+// A11 = ei_pmul(ptmp1, A11);
+// A12 = ei_pmul(ptmp2, A12);
+// A13 = ei_pmul(ptmp3, A13);
+//
+// A00 = ei_padd(A00,A01);
+// A02 = ei_padd(A02,A03);
+// A00 = ei_padd(A00,ei_pload(&X[j]));
+// A00 = ei_padd(A00,A02);
+// ei_pstore(&X[j],A00);
+//
+// A10 = ei_padd(A10,A11);
+// A12 = ei_padd(A12,A13);
+// A10 = ei_padd(A10,ei_pload(&X[j+PacketSize]));
+// A10 = ei_padd(A10,A12);
+// ei_pstore(&X[j+PacketSize],A10);
+
ei_pstore(&X[j],
ei_padd(ei_pload(&X[j]),
ei_padd(
- ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
- ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
+ ei_padd(ei_pmul(ptmp0,ei_pload(&A0[j])),ei_pmul(ptmp1,ei_ploadu(&A1[j]))),
+ ei_padd(ei_pmul(ptmp2,ei_ploadu(&A2[j])),ei_pmul(ptmp3,ei_ploadu(&A3[j]))) )));
ei_pstore(&X[j+PacketSize],
ei_padd(ei_pload(&X[j+PacketSize]),
ei_padd(
- ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+PacketSize+iN1]))),
- ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+PacketSize+iN3]))) )));
-
- ei_pstore(&X[j+2*PacketSize],
- ei_padd(ei_pload(&X[j+2*PacketSize]),
- ei_padd(
- ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+2*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+2*PacketSize+iN1]))),
- ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+2*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+2*PacketSize+iN3]))) )));
-
- ei_pstore(&X[j+3*PacketSize],
- ei_padd(ei_pload(&X[j+3*PacketSize]),
- ei_padd(
- ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+3*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+3*PacketSize+iN1]))),
- ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+3*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+3*PacketSize+iN3]))) )));
-
+ ei_padd(ei_pmul(ptmp0,ei_pload(&A0[j+PacketSize])),ei_pmul(ptmp1,ei_ploadu(&A1[j+PacketSize]))),
+ ei_padd(ei_pmul(ptmp2,ei_ploadu(&A2[j+PacketSize])),ei_pmul(ptmp3,ei_ploadu(&A3[j+PacketSize]))) )));
}
for (int j = ANP;j<AN;j+=PacketSize)
ei_pstore(&X[j],
ei_padd(ei_pload(&X[j]),
ei_padd(
- ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
- ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
- }
+ ei_padd(ei_pmul(ptmp0,ei_pload(&A0[j])),ei_pmul(ptmp1,ei_ploadu(&A1[j]))),
+ ei_padd(ei_pmul(ptmp2,ei_ploadu(&A2[j])),ei_pmul(ptmp3,ei_ploadu(&A3[j]))) )));
}
// process remaining scalars
for (int j=AN;j<N;j++)
- X[j] += tmp0 * A[j+iN0] + tmp1 * A[j+iN1];
+ X[j] += ei_pfirst(ptmp0) * A0[j] + ei_pfirst(ptmp1) * A1[j] + ei_pfirst(ptmp2) * A2[j] + ei_pfirst(ptmp3) * A3[j];
}
for (int i=bound;i<N;i++)
{
@@ -185,6 +241,119 @@ static inline void matrix_vector_product(const gene_matrix & A, const gene_vecto
// {
// asm("#begin matrix_vector_product");
// int AN = (N/PacketSize)*PacketSize;
+// int ANP = (AN/(2*PacketSize))*2*PacketSize;
+// int bound = (N/4)*4;
+// for (int i=0;i<N;i++)
+// X[i] = 0;
+//
+// for (int i=0;i<bound;i+=4)
+// {
+// real tmp0 = B[i];
+// Packet ptmp0 = ei_pset1(tmp0);
+// real tmp1 = B[i+1];
+// Packet ptmp1 = ei_pset1(tmp1);
+// real tmp2 = B[i+2];
+// Packet ptmp2 = ei_pset1(tmp2);
+// real tmp3 = B[i+3];
+// Packet ptmp3 = ei_pset1(tmp3);
+// int iN0 = i*N;
+// int iN1 = (i+1)*N;
+// int iN2 = (i+2)*N;
+// int iN3 = (i+3)*N;
+// if (AN>0)
+// {
+// // int aligned0 = (iN0 % PacketSize);
+// int aligned1 = (iN1 % PacketSize);
+//
+// if (aligned1==0)
+// {
+// for (int j = 0;j<AN;j+=PacketSize)
+// {
+// ei_pstore(&X[j],
+// ei_padd(ei_pload(&X[j]),
+// ei_padd(
+// ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_pload(&A[j+iN1]))),
+// ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_pload(&A[j+iN3]))) )));
+// }
+// }
+// else if (aligned1==2)
+// {
+// for (int j = 0;j<AN;j+=PacketSize)
+// {
+// ei_pstore(&X[j],
+// ei_padd(ei_pload(&X[j]),
+// ei_padd(
+// ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
+// ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
+// }
+// }
+// else
+// {
+// for (int j = 0;j<ANP;j+=2*PacketSize)
+// {
+// ei_pstore(&X[j],
+// ei_padd(ei_pload(&X[j]),
+// ei_padd(
+// ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
+// ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
+//
+// ei_pstore(&X[j+PacketSize],
+// ei_padd(ei_pload(&X[j+PacketSize]),
+// ei_padd(
+// ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+PacketSize+iN1]))),
+// ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+PacketSize+iN3]))) )));
+//
+// // ei_pstore(&X[j+2*PacketSize],
+// // ei_padd(ei_pload(&X[j+2*PacketSize]),
+// // ei_padd(
+// // ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+2*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+2*PacketSize+iN1]))),
+// // ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+2*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+2*PacketSize+iN3]))) )));
+// //
+// // ei_pstore(&X[j+3*PacketSize],
+// // ei_padd(ei_pload(&X[j+3*PacketSize]),
+// // ei_padd(
+// // ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+3*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+3*PacketSize+iN1]))),
+// // ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+3*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+3*PacketSize+iN3]))) )));
+//
+// }
+// for (int j = ANP;j<AN;j+=PacketSize)
+// ei_pstore(&X[j],
+// ei_padd(ei_pload(&X[j]),
+// ei_padd(
+// ei_padd(ei_pmul(ptmp0,ei_ploadu(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
+// ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
+// }
+// }
+// // process remaining scalars
+// for (int j=AN;j<N;j++)
+// X[j] += tmp0 * A[j+iN0] + tmp1 * A[j+iN1] + tmp2 * A[j+iN2] + tmp3 * A[j+iN3];
+// }
+// for (int i=bound;i<N;i++)
+// {
+// real tmp0 = B[i];
+// Packet ptmp0 = ei_pset1(tmp0);
+// int iN0 = i*N;
+// if (AN>0)
+// {
+// bool aligned0 = (iN0 % PacketSize) == 0;
+// if (aligned0)
+// for (int j = 0;j<AN;j+=PacketSize)
+// ei_pstore(&X[j], ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pload(&X[j])));
+// else
+// for (int j = 0;j<AN;j+=PacketSize)
+// ei_pstore(&X[j], ei_padd(ei_pmul(ptmp0,ei_ploadu(&A[j+iN0])),ei_pload(&X[j])));
+// }
+// // process remaining scalars
+// for (int j=AN;j<N;j++)
+// X[j] += tmp0 * A[j+iN0];
+// }
+// asm("#end matrix_vector_product");
+// }
+
+// static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
+// {
+// asm("#begin matrix_vector_product");
+// int AN = (N/PacketSize)*PacketSize;
// for (int i=0;i<N;i++)
// X[i] = 0;
//
diff --git a/bench/btl/libs/mtl4/main.cpp b/bench/btl/libs/mtl4/main.cpp
index 8cf1f6fa9..94c81beac 100644
--- a/bench/btl/libs/mtl4/main.cpp
+++ b/bench/btl/libs/mtl4/main.cpp
@@ -18,13 +18,8 @@
#include "utilities.h"
#include "mtl4_interface.hh"
#include "bench.hh"
-#include "action_matrix_vector_product.hh"
-#include "action_matrix_matrix_product.hh"
-#include "action_axpy.hh"
-#include "action_lu_solve.hh"
-#include "action_ata_product.hh"
-#include "action_aat_product.hh"
-#include "action_atv_product.hh"
+#include "basic_actions.hh"
+#include "action_cholesky.hh"
BTL_MAIN;
@@ -32,12 +27,17 @@ int main()
{
bench<Action_axpy<mtl4_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+ bench<Action_axpby<mtl4_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+
bench<Action_matrix_vector_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_matrix_matrix_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_ata_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_aat_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+ bench<Action_trisolve<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+ bench<Action_cholesky<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+
return 0;
}
diff --git a/bench/btl/libs/mtl4/mtl4_interface.hh b/bench/btl/libs/mtl4/mtl4_interface.hh
index 5beb936db..a299fa83e 100644
--- a/bench/btl/libs/mtl4/mtl4_interface.hh
+++ b/bench/btl/libs/mtl4/mtl4_interface.hh
@@ -19,6 +19,7 @@
#define MTL4_INTERFACE_HH
#include <boost/numeric/mtl/mtl.hpp>
+#include <boost/numeric/mtl/operation/cholesky.hpp>
#include <vector>
using namespace mtl;
@@ -81,6 +82,9 @@ public :
static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
X = (A*B);
+// morton_dense<double, doppled_64_row_mask> C(N,N);
+// C = B;
+// X = (A*C);
}
static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
@@ -88,11 +92,11 @@ public :
}
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
-// X = (trans(A)*A);
+ X = (trans(A)*A);
}
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
-// X = (A*trans(A));
+ X = (A*trans(A));
}
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
@@ -107,6 +111,19 @@ public :
Y += coef * X;
}
+ static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
+ Y = a*X + b*Y;
+ }
+
+ static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
+ C = X;
+ recursive_cholesky(C);
+ }
+
+ static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
+ X = lower_trisolve(L, B);
+ }
+
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
cible = source;
}
diff --git a/bench/btl/libs/ublas/main.cpp b/bench/btl/libs/ublas/main.cpp
index acc59364f..6d286e1a3 100644
--- a/bench/btl/libs/ublas/main.cpp
+++ b/bench/btl/libs/ublas/main.cpp
@@ -20,18 +20,15 @@
#include "utilities.h"
#include "ublas_interface.hh"
#include "bench.hh"
-#include "action_matrix_vector_product.hh"
-#include "action_matrix_matrix_product.hh"
-#include "action_axpy.hh"
-#include "action_ata_product.hh"
-#include "action_aat_product.hh"
-#include "action_atv_product.hh"
+#include "basic_actions.hh"
+#include "basic_actions.hh"
BTL_MAIN;
int main()
{
bench<Action_axpy<ublas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+ bench<Action_axpby<ublas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_vector_product<ublas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<ublas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
@@ -40,6 +37,8 @@ int main()
bench<Action_ata_product<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+ bench<Action_trisolve<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+
return 0;
}
diff --git a/bench/btl/libs/ublas/ublas_interface.hh b/bench/btl/libs/ublas/ublas_interface.hh
index 2572f8c21..95cad5195 100644
--- a/bench/btl/libs/ublas/ublas_interface.hh
+++ b/bench/btl/libs/ublas/ublas_interface.hh
@@ -23,7 +23,9 @@
#include <boost/numeric/ublas/vector.hpp>
#include <boost/numeric/ublas/matrix.hpp>
#include <boost/numeric/ublas/io.hpp>
+#include <boost/numeric/ublas/triangular.hpp>
+using namespace boost::numeric;
template <class real>
class ublas_interface{
@@ -116,6 +118,10 @@ public :
Y.plus_assign(coef*X);
}
+ static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
+ Y = a*X + b*Y;
+ }
+
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
// X = prod(trans(A),A);
X.assign(prod(trans(A),A));
@@ -126,6 +132,10 @@ public :
X.assign(prod(A,trans(A)));
}
+ static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
+ X = solve(L, B, ublas::lower_tag ());
+ }
+
};
#endif