diff options
author | Gael Guennebaud <g.gael@free.fr> | 2010-03-05 11:35:43 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2010-03-05 11:35:43 +0100 |
commit | c44220835864d2994a2a2b030100125fa00aa378 (patch) | |
tree | 4d16beeda69b216d1c5d70eb359d61c6aa3506f8 | |
parent | 5f172cd01f8e4adb790977b4ea5b9da660ca790e (diff) |
clean a bit the bench_gemm files
-rw-r--r-- | bench/bench_gemm.cpp | 9 | ||||
-rw-r--r-- | bench/bench_gemm_blas.cpp | 109 |
2 files changed, 3 insertions, 115 deletions
diff --git a/bench/bench_gemm.cpp b/bench/bench_gemm.cpp index 3cb75c17a..5c55d4b7c 100644 --- a/bench/bench_gemm.cpp +++ b/bench/bench_gemm.cpp @@ -20,11 +20,6 @@ typedef Matrix<Scalar,Dynamic,Dynamic> M; extern "C" { #include <bench/btl/libs/C_BLAS/blas.h> - - void sgemm_kernel(int actual_mc, int cols, int actual_kc, float alpha, - float* blockA, float* blockB, float* res, int resStride); - void sgemm_oncopy(int actual_kc, int cols, const float* rhs, int rhsStride, float* blockB); - void sgemm_itcopy(int actual_kc, int cols, const float* rhs, int rhsStride, float* blockB); } static float fone = 1; @@ -72,7 +67,9 @@ int main(int argc, char ** argv) int rep = 1; // number of repetitions per try int tries = 5; // number of tries, we keep the best - int s = 2048; + int s = argc==2 ? std::atoi(argv[1]) : 2048; + std::cout << "Matrix size = " << s << "\n"; + int m = s; int n = s; int p = s; diff --git a/bench/bench_gemm_blas.cpp b/bench/bench_gemm_blas.cpp deleted file mode 100644 index 254302312..000000000 --- a/bench/bench_gemm_blas.cpp +++ /dev/null @@ -1,109 +0,0 @@ - -#include <Eigen/Core> -#include <bench/BenchTimer.h> - -extern "C" -{ - #include <bench/btl/libs/C_BLAS/blas.h> - #include <cblas.h> - - void sgemm_kernel(int actual_mc, int cols, int actual_kc, float alpha, - float* blockA, float* blockB, float* res, int resStride); - - void sgemm_otcopy(int actual_kc, int cols, const float* rhs, int rhsStride, float* blockB); - void sgemm_oncopy(int actual_kc, int cols, const float* rhs, int rhsStride, float* blockB); - void sgemm_itcopy(int actual_kc, int cols, const float* rhs, int rhsStride, float* blockB); - void sgemm_incopy(int actual_kc, int cols, const float* rhs, int rhsStride, float* blockB); -} - -using namespace std; -using namespace Eigen; - -#ifndef SCALAR -#define SCALAR float -#endif - -typedef SCALAR Scalar; -typedef Matrix<Scalar,Dynamic,Dynamic> M; - -static float fone = 1; -static float fzero = 0; -static double done = 1; -static double szero = 0; -static char notrans = 'N'; -static char trans = 'T'; -static char nonunit = 'N'; -static char lower = 'L'; -static char right = 'R'; -static int intone = 1; - -void blas_gemm(const MatrixXf& a, const MatrixXf& b, MatrixXf& c) -{ - int M = c.rows(); - int N = c.cols(); - int K = a.cols(); - - int lda = a.rows(); - int ldb = b.rows(); - int ldc = c.rows(); - -// c.noalias() += a * b; - sgemm_(¬rans,¬rans,&M,&N,&K,&fone, - const_cast<float*>(a.data()),&lda, - const_cast<float*>(b.data()),&ldb,&fone, - c.data(),&ldc); -} - -void blas_gemm(const MatrixXd& a, const MatrixXd& b, MatrixXd& c) -{ - int M = c.rows(); - int N = c.cols(); - int K = a.cols(); - - int lda = a.rows(); - int ldb = b.rows(); - int ldc = c.rows(); - -// c.noalias() += a * b; - - dgemm_(¬rans,¬rans,&M,&N,&K,&done, - const_cast<double*>(a.data()),&lda, - const_cast<double*>(b.data()),&ldb,&done, - c.data(),&ldc); -} - -int main(int argc, char **argv) -{ - int rep = 1; - int s = 2048; - int m = s; - int n = s; - int p = s; - const int N = 1; - M a[N]; - M b[N]; - M c[N]; - - for (int k=0; k<N; ++k) - { - a[k].resize(m,p); a[k].setOnes(); - b[k].resize(p,n); b[k].setOnes(); - c[k].resize(m,n); c[k].setOnes(); - } - - BenchTimer t; - - BENCH(t, 5, rep, - for(int k=0;k<N;++k) - blas_gemm(a[k],b[k],c[k])); - -// BENCH(t, 5, rep, -// _Pragma("omp parallel for schedule(static,1)") -// for(int k=0;k<N;++k) -// blas_gemm(a[k],b[k],c[k])); - - std::cerr << "cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*N*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n"; - std::cerr << "real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*N*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n"; - return 0; -} - |