diff options
Diffstat (limited to 'bench/btl/libs/C_BLAS/C_BLAS_interface.hh')
-rw-r--r-- | bench/btl/libs/C_BLAS/C_BLAS_interface.hh | 74 |
1 files changed, 50 insertions, 24 deletions
diff --git a/bench/btl/libs/C_BLAS/C_BLAS_interface.hh b/bench/btl/libs/C_BLAS/C_BLAS_interface.hh index eadbf0dd2..49ae90f3d 100644 --- a/bench/btl/libs/C_BLAS/C_BLAS_interface.hh +++ b/bench/btl/libs/C_BLAS/C_BLAS_interface.hh @@ -25,6 +25,11 @@ extern "C" { #include "cblas.h" +#ifdef HAS_LAPACK + // Cholesky Factorization + void spotrf_(const char* uplo, const int* n, float *a, const int* ld, int* info); + void dpotrf_(const char* uplo, const int* n, double *a, const int* ld, int* info); +#endif } #define MAKE_STRING2(S) #S @@ -53,31 +58,31 @@ public : cblas_dgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1); } - static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N) - { + static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N); } - static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N) - { + static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ cblas_dgemm(CblasColMajor,CblasTrans,CblasTrans,N,N,N,1.0,A,N,B,N,0.0,X,N); } - static inline void ata_product(gene_matrix & A, gene_matrix & X, int N) - { + static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){ cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N); } - static inline void aat_product(gene_matrix & A, gene_matrix & X, int N) - { + static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){ cblas_dgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N); } - static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N) - { + static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){ cblas_daxpy(N,coef,X,1,Y,1); } + static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){ + cblas_dscal(N,b,Y,1); + cblas_daxpy(N,a,X,1,Y,1); + } + }; template<> @@ -90,41 +95,62 @@ public : return MAKE_STRING(CBLASNAME); } - static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N) - { + static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ cblas_sgemv(CblasColMajor,CblasNoTrans,N,N,1.0,A,N,B,1,0.0,X,1); } - static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N) - { + static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ cblas_sgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1); } - static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N) - { + static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N); } - static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N) - { + static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N); } - static inline void ata_product(gene_matrix & A, gene_matrix & X, int N) - { + static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){ cblas_sgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N); } - static inline void aat_product(gene_matrix & A, gene_matrix & X, int N) - { + static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){ cblas_sgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N); } - static inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N) - { + static inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N){ cblas_saxpy(N,coef,X,1,Y,1); } + static inline void axpby(float a, const gene_vector & X, float b, gene_vector & Y, int N){ + cblas_sscal(N,b,Y,1); + cblas_saxpy(N,a,X,1,Y,1); + } + + #ifdef HAS_LAPACK + static inline void cholesky(const gene_vector & X, gene_vector & C, int N){ + cblas_scopy(N*N, X, 1, C, 1); + char uplo = 'L'; + int info = 0; + spotrf_(&uplo, &N, C, &N, &info); +// float tmp[N]; +// for (int j = 1; j < N; ++j) +// { +// int endSize = N-j-1; +// if (endSize>0) { + //cblas_sgemv(CblasColMajor, CblasNoTrans, N-j-1, j, ATL_rnone, A+j+1, lda, A+j, lda, ATL_rone, Ac+j+1, 1); +// cblas_sgemv(CblasColMajor, CblasNoTrans,endSize,j, 1.0, &(C[j+1]),N, &(C[j]),N, 0.0, &(C[j+1+N*j]),1); +// } +// } + } + #endif + + static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){ + cblas_scopy(N, B, 1, X, 1); + cblas_strsv(CblasColMajor, CblasLower, CblasNoTrans, CblasNonUnit, N, L, N, X, 1); + } + }; |