diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-04-11 17:20:17 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-04-11 17:20:17 -0700 |
commit | d6e596174d09446236b3f398d8ec39148c638ed9 (patch) | |
tree | ccb4116b05dc11d7931bac0129fd1394abe1e0b0 /blas | |
parent | 3ca1ae2bb761d7738bcdad885639f422a6b7c914 (diff) | |
parent | 833efb39bfe4957934982112fe435ab30a0c3b4f (diff) |
Pull latest updates from upstream
Diffstat (limited to 'blas')
-rw-r--r-- | blas/common.h | 29 | ||||
-rw-r--r-- | blas/level1_impl.h | 6 | ||||
-rw-r--r-- | blas/level2_cplx_impl.h | 110 | ||||
-rw-r--r-- | blas/level2_impl.h | 342 | ||||
-rw-r--r-- | blas/level2_real_impl.h | 162 | ||||
-rw-r--r-- | blas/level3_impl.h | 428 |
6 files changed, 533 insertions, 544 deletions
diff --git a/blas/common.h b/blas/common.h index 5ecb153e2..61d8344d9 100644 --- a/blas/common.h +++ b/blas/common.h @@ -10,8 +10,8 @@ #ifndef EIGEN_BLAS_COMMON_H #define EIGEN_BLAS_COMMON_H -#include <Eigen/Core> -#include <Eigen/Jacobi> +#include "../Eigen/Core" +#include "../Eigen/Jacobi" #include <complex> @@ -19,8 +19,7 @@ #error the token SCALAR must be defined to compile this file #endif -#include <Eigen/src/misc/blas.h> - +#include "../Eigen/src/misc/blas.h" #define NOTR 0 #define TR 1 @@ -94,6 +93,7 @@ enum typedef Matrix<Scalar,Dynamic,Dynamic,ColMajor> PlainMatrixType; typedef Map<Matrix<Scalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > MatrixType; +typedef Map<const Matrix<Scalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > ConstMatrixType; typedef Map<Matrix<Scalar,Dynamic,1>, 0, InnerStride<Dynamic> > StridedVectorType; typedef Map<Matrix<Scalar,Dynamic,1> > CompactVectorType; @@ -105,24 +105,43 @@ matrix(T* data, int rows, int cols, int stride) } template<typename T> +Map<const Matrix<T,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > +matrix(const T* data, int rows, int cols, int stride) +{ + return Map<const Matrix<T,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> >(data, rows, cols, OuterStride<>(stride)); +} + +template<typename T> Map<Matrix<T,Dynamic,1>, 0, InnerStride<Dynamic> > make_vector(T* data, int size, int incr) { return Map<Matrix<T,Dynamic,1>, 0, InnerStride<Dynamic> >(data, size, InnerStride<Dynamic>(incr)); } template<typename T> +Map<const Matrix<T,Dynamic,1>, 0, InnerStride<Dynamic> > make_vector(const T* data, int size, int incr) +{ + return Map<const Matrix<T,Dynamic,1>, 0, InnerStride<Dynamic> >(data, size, InnerStride<Dynamic>(incr)); +} + +template<typename T> Map<Matrix<T,Dynamic,1> > make_vector(T* data, int size) { return Map<Matrix<T,Dynamic,1> >(data, size); } template<typename T> +Map<const Matrix<T,Dynamic,1> > make_vector(const T* data, int size) +{ + return Map<const Matrix<T,Dynamic,1> >(data, size); +} + +template<typename T> T* get_compact_vector(T* x, int n, int incx) { if(incx==1) return x; - T* ret = new Scalar[n]; + typename Eigen::internal::remove_const<T>::type* ret = new Scalar[n]; if(incx<0) make_vector(ret,n) = make_vector(x,n,-incx).reverse(); else make_vector(ret,n) = make_vector(x,n, incx); return ret; diff --git a/blas/level1_impl.h b/blas/level1_impl.h index e623bd178..f857bfa20 100644 --- a/blas/level1_impl.h +++ b/blas/level1_impl.h @@ -9,11 +9,11 @@ #include "common.h" -int EIGEN_BLAS_FUNC(axpy)(int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy) +int EIGEN_BLAS_FUNC(axpy)(const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, RealScalar *py, const int *incy) { - Scalar* x = reinterpret_cast<Scalar*>(px); + const Scalar* x = reinterpret_cast<const Scalar*>(px); Scalar* y = reinterpret_cast<Scalar*>(py); - Scalar alpha = *reinterpret_cast<Scalar*>(palpha); + Scalar alpha = *reinterpret_cast<const Scalar*>(palpha); if(*n<=0) return 0; diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h index 9b845de22..e3ce61435 100644 --- a/blas/level2_cplx_impl.h +++ b/blas/level2_cplx_impl.h @@ -16,28 +16,22 @@ * where alpha and beta are scalars, x and y are n element vectors and * A is an n by n hermitian matrix. */ -int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) +int EIGEN_BLAS_FUNC(hemv)(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda, + const RealScalar *px, const int *incx, const RealScalar *pbeta, RealScalar *py, const int *incy) { typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Upper,false,false>::run); - func[LO] = (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Lower,false,false>::run); - - init = true; - } - - Scalar* a = reinterpret_cast<Scalar*>(pa); - Scalar* x = reinterpret_cast<Scalar*>(px); + static const functype func[2] = { + // array index: UP + (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Upper,false,false>::run), + // array index: LO + (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Lower,false,false>::run), + }; + + const Scalar* a = reinterpret_cast<const Scalar*>(pa); + const Scalar* x = reinterpret_cast<const Scalar*>(px); Scalar* y = reinterpret_cast<Scalar*>(py); - Scalar alpha = *reinterpret_cast<Scalar*>(palpha); - Scalar beta = *reinterpret_cast<Scalar*>(pbeta); + Scalar alpha = *reinterpret_cast<const Scalar*>(palpha); + Scalar beta = *reinterpret_cast<const Scalar*>(pbeta); // check arguments int info = 0; @@ -52,7 +46,7 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa if(*n==0) return 1; - Scalar* actual_x = get_compact_vector(x,*n,*incx); + const Scalar* actual_x = get_compact_vector(x,*n,*incx); Scalar* actual_y = get_compact_vector(y,*n,*incy); if(beta!=Scalar(1)) @@ -111,19 +105,12 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pap) { typedef void (*functype)(int, Scalar*, const Scalar*, RealScalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run); - func[LO] = (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run), + // array index: LO + (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run), + }; Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* ap = reinterpret_cast<Scalar*>(pap); @@ -162,19 +149,12 @@ int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap) { typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::packed_rank2_update_selector<Scalar,int,Upper>::run); - func[LO] = (internal::packed_rank2_update_selector<Scalar,int,Lower>::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::packed_rank2_update_selector<Scalar,int,Upper>::run), + // array index: LO + (internal::packed_rank2_update_selector<Scalar,int,Lower>::run), + }; Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* y = reinterpret_cast<Scalar*>(py); @@ -217,19 +197,12 @@ int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda) { typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, const Scalar&); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (selfadjoint_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run); - func[LO] = (selfadjoint_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (selfadjoint_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run), + // array index: LO + (selfadjoint_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run), + }; Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* a = reinterpret_cast<Scalar*>(pa); @@ -271,19 +244,12 @@ int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int EIGEN_BLAS_FUNC(her2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda) { typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::rank2_update_selector<Scalar,int,Upper>::run); - func[LO] = (internal::rank2_update_selector<Scalar,int,Lower>::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::rank2_update_selector<Scalar,int,Upper>::run), + // array index: LO + (internal::rank2_update_selector<Scalar,int,Lower>::run), + }; Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* y = reinterpret_cast<Scalar*>(py); diff --git a/blas/level2_impl.h b/blas/level2_impl.h index 917f2e372..173f40b44 100644 --- a/blas/level2_impl.h +++ b/blas/level2_impl.h @@ -23,29 +23,25 @@ struct general_matrix_vector_product_wrapper } }; -int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *incb, RealScalar *pbeta, RealScalar *pc, int *incc) +int EIGEN_BLAS_FUNC(gemv)(const char *opa, const int *m, const int *n, const RealScalar *palpha, + const RealScalar *pa, const int *lda, const RealScalar *pb, const int *incb, const RealScalar *pbeta, RealScalar *pc, const int *incc) { typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int , Scalar *, int, Scalar); - static functype func[4]; - - static bool init = false; - if(!init) - { - for(int k=0; k<4; ++k) - func[k] = 0; - - func[NOTR] = (general_matrix_vector_product_wrapper<int,Scalar,ColMajor,false,false>::run); - func[TR ] = (general_matrix_vector_product_wrapper<int,Scalar,RowMajor,false,false>::run); - func[ADJ ] = (general_matrix_vector_product_wrapper<int,Scalar,RowMajor,Conj ,false>::run); - - init = true; - } - - Scalar* a = reinterpret_cast<Scalar*>(pa); - Scalar* b = reinterpret_cast<Scalar*>(pb); + static const functype func[4] = { + // array index: NOTR + (general_matrix_vector_product_wrapper<int,Scalar,ColMajor,false,false>::run), + // array index: TR + (general_matrix_vector_product_wrapper<int,Scalar,RowMajor,false,false>::run), + // array index: ADJ + (general_matrix_vector_product_wrapper<int,Scalar,RowMajor,Conj ,false>::run), + 0 + }; + + const Scalar* a = reinterpret_cast<const Scalar*>(pa); + const Scalar* b = reinterpret_cast<const Scalar*>(pb); Scalar* c = reinterpret_cast<Scalar*>(pc); - Scalar alpha = *reinterpret_cast<Scalar*>(palpha); - Scalar beta = *reinterpret_cast<Scalar*>(pbeta); + Scalar alpha = *reinterpret_cast<const Scalar*>(palpha); + Scalar beta = *reinterpret_cast<const Scalar*>(pbeta); // check arguments int info = 0; @@ -67,7 +63,7 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca if(code!=NOTR) std::swap(actual_m,actual_n); - Scalar* actual_b = get_compact_vector(b,actual_n,*incb); + const Scalar* actual_b = get_compact_vector(b,actual_n,*incb); Scalar* actual_c = get_compact_vector(c,actual_m,*incc); if(beta!=Scalar(1)) @@ -87,37 +83,41 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca return 1; } -int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb) +int EIGEN_BLAS_FUNC(trsv)(const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa, const int *lda, RealScalar *pb, const int *incb) { typedef void (*functype)(int, const Scalar *, int, Scalar *); - static functype func[16]; - - static bool init = false; - if(!init) - { - for(int k=0; k<16; ++k) - func[k] = 0; - - func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,ColMajor>::run); - func[TR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,RowMajor>::run); - func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, Conj, RowMajor>::run); - - func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,ColMajor>::run); - func[TR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,RowMajor>::run); - func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, Conj, RowMajor>::run); - - func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,ColMajor>::run); - func[TR | (UP << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,RowMajor>::run); - func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,Conj, RowMajor>::run); - - func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,ColMajor>::run); - func[TR | (LO << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,RowMajor>::run); - func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,Conj, RowMajor>::run); - - init = true; - } - - Scalar* a = reinterpret_cast<Scalar*>(pa); + static const functype func[16] = { + // array index: NOTR | (UP << 2) | (NUNIT << 3) + (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,ColMajor>::run), + // array index: TR | (UP << 2) | (NUNIT << 3) + (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,RowMajor>::run), + // array index: ADJ | (UP << 2) | (NUNIT << 3) + (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, Conj, RowMajor>::run), + 0, + // array index: NOTR | (LO << 2) | (NUNIT << 3) + (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,ColMajor>::run), + // array index: TR | (LO << 2) | (NUNIT << 3) + (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,RowMajor>::run), + // array index: ADJ | (LO << 2) | (NUNIT << 3) + (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, Conj, RowMajor>::run), + 0, + // array index: NOTR | (UP << 2) | (UNIT << 3) + (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,ColMajor>::run), + // array index: TR | (UP << 2) | (UNIT << 3) + (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,RowMajor>::run), + // array index: ADJ | (UP << 2) | (UNIT << 3) + (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,Conj, RowMajor>::run), + 0, + // array index: NOTR | (LO << 2) | (UNIT << 3) + (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,ColMajor>::run), + // array index: TR | (LO << 2) | (UNIT << 3) + (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,RowMajor>::run), + // array index: ADJ | (LO << 2) | (UNIT << 3) + (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,Conj, RowMajor>::run), + 0 + }; + + const Scalar* a = reinterpret_cast<const Scalar*>(pa); Scalar* b = reinterpret_cast<Scalar*>(pb); int info = 0; @@ -142,37 +142,41 @@ int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar -int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb) +int EIGEN_BLAS_FUNC(trmv)(const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa, const int *lda, RealScalar *pb, const int *incb) { typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, const Scalar&); - static functype func[16]; - - static bool init = false; - if(!init) - { - for(int k=0; k<16; ++k) - func[k] = 0; - - func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,ColMajor>::run); - func[TR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,RowMajor>::run); - func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|0, Scalar,Conj, Scalar,false,RowMajor>::run); - - func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,ColMajor>::run); - func[TR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,RowMajor>::run); - func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|0, Scalar,Conj, Scalar,false,RowMajor>::run); - - func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run); - func[TR | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run); - func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run); - - func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run); - func[TR | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run); - func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run); - - init = true; - } - - Scalar* a = reinterpret_cast<Scalar*>(pa); + static const functype func[16] = { + // array index: NOTR | (UP << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,ColMajor>::run), + // array index: TR | (UP << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,RowMajor>::run), + // array index: ADJ | (UP << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product<int,Lower|0, Scalar,Conj, Scalar,false,RowMajor>::run), + 0, + // array index: NOTR | (LO << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,ColMajor>::run), + // array index: TR | (LO << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,RowMajor>::run), + // array index: ADJ | (LO << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product<int,Upper|0, Scalar,Conj, Scalar,false,RowMajor>::run), + 0, + // array index: NOTR | (UP << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run), + // array index: TR | (UP << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run), + // array index: ADJ | (UP << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run), + 0, + // array index: NOTR | (LO << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run), + // array index: TR | (LO << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run), + // array index: ADJ | (LO << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run), + 0 + }; + + const Scalar* a = reinterpret_cast<const Scalar*>(pa); Scalar* b = reinterpret_cast<Scalar*>(pb); int info = 0; @@ -214,11 +218,11 @@ int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) { - Scalar* a = reinterpret_cast<Scalar*>(pa); - Scalar* x = reinterpret_cast<Scalar*>(px); + const Scalar* a = reinterpret_cast<const Scalar*>(pa); + const Scalar* x = reinterpret_cast<const Scalar*>(px); Scalar* y = reinterpret_cast<Scalar*>(py); - Scalar alpha = *reinterpret_cast<Scalar*>(palpha); - Scalar beta = *reinterpret_cast<Scalar*>(pbeta); + Scalar alpha = *reinterpret_cast<const Scalar*>(palpha); + Scalar beta = *reinterpret_cast<const Scalar*>(pbeta); int coeff_rows = *kl+*ku+1; int info = 0; @@ -241,7 +245,7 @@ int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealSca if(OP(*trans)!=NOTR) std::swap(actual_m,actual_n); - Scalar* actual_x = get_compact_vector(x,actual_n,*incx); + const Scalar* actual_x = get_compact_vector(x,actual_n,*incx); Scalar* actual_y = get_compact_vector(y,actual_m,*incy); if(beta!=Scalar(1)) @@ -250,7 +254,7 @@ int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealSca else make_vector(actual_y, actual_m) *= beta; } - MatrixType mat_coeffs(a,coeff_rows,*n,*lda); + ConstMatrixType mat_coeffs(a,coeff_rows,*n,*lda); int nb = std::min(*n,(*m)+(*ku)); for(int j=0; j<nb; ++j) @@ -346,32 +350,36 @@ int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *opa, char *diag, int *n, int *k, Rea int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx) { typedef void (*functype)(int, int, const Scalar *, int, Scalar *); - static functype func[16]; - - static bool init = false; - if(!init) - { - for(int i=0; i<16; ++i) - func[i] = 0; - - func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|0, Scalar,false,Scalar,ColMajor>::run); - func[TR | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|0, Scalar,false,Scalar,RowMajor>::run); - func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|0, Scalar,Conj, Scalar,RowMajor>::run); - - func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|0, Scalar,false,Scalar,ColMajor>::run); - func[TR | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|0, Scalar,false,Scalar,RowMajor>::run); - func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|0, Scalar,Conj, Scalar,RowMajor>::run); - - func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,false,Scalar,ColMajor>::run); - func[TR | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,false,Scalar,RowMajor>::run); - func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,Conj, Scalar,RowMajor>::run); - - func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,false,Scalar,ColMajor>::run); - func[TR | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,false,Scalar,RowMajor>::run); - func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,Conj, Scalar,RowMajor>::run); - - init = true; - } + static const functype func[16] = { + // array index: NOTR | (UP << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector<int,Upper|0, Scalar,false,Scalar,ColMajor>::run), + // array index: TR | (UP << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector<int,Lower|0, Scalar,false,Scalar,RowMajor>::run), + // array index: ADJ | (UP << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector<int,Lower|0, Scalar,Conj, Scalar,RowMajor>::run), + 0, + // array index: NOTR | (LO << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector<int,Lower|0, Scalar,false,Scalar,ColMajor>::run), + // array index: TR | (LO << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector<int,Upper|0, Scalar,false,Scalar,RowMajor>::run), + // array index: ADJ | (LO << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector<int,Upper|0, Scalar,Conj, Scalar,RowMajor>::run), + 0, + // array index: NOTR | (UP << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,false,Scalar,ColMajor>::run), + // array index: TR | (UP << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,false,Scalar,RowMajor>::run), + // array index: ADJ | (UP << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,Conj, Scalar,RowMajor>::run), + 0, + // array index: NOTR | (LO << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,false,Scalar,ColMajor>::run), + // array index: TR | (LO << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,false,Scalar,RowMajor>::run), + // array index: ADJ | (LO << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,Conj, Scalar,RowMajor>::run), + 0, + }; Scalar* a = reinterpret_cast<Scalar*>(pa); Scalar* x = reinterpret_cast<Scalar*>(px); @@ -416,32 +424,36 @@ int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, Real int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) { typedef void (*functype)(int, const Scalar*, const Scalar*, Scalar*, Scalar); - static functype func[16]; - - static bool init = false; - if(!init) - { - for(int k=0; k<16; ++k) - func[k] = 0; - - func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,ColMajor>::run); - func[TR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,RowMajor>::run); - func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,Conj, Scalar,false,RowMajor>::run); - - func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,ColMajor>::run); - func[TR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,RowMajor>::run); - func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,Conj, Scalar,false,RowMajor>::run); - - func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run); - func[TR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run); - func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run); - - func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run); - func[TR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run); - func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run); - - init = true; - } + static const functype func[16] = { + // array index: NOTR | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,ColMajor>::run), + // array index: TR | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,RowMajor>::run), + // array index: ADJ | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,Conj, Scalar,false,RowMajor>::run), + 0, + // array index: NOTR | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,ColMajor>::run), + // array index: TR | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,RowMajor>::run), + // array index: ADJ | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,Conj, Scalar,false,RowMajor>::run), + 0, + // array index: NOTR | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run), + // array index: TR | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run), + // array index: ADJ | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run), + 0, + // array index: NOTR | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run), + // array index: TR | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run), + // array index: ADJ | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run), + 0 + }; Scalar* ap = reinterpret_cast<Scalar*>(pap); Scalar* x = reinterpret_cast<Scalar*>(px); @@ -487,32 +499,36 @@ int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar int EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) { typedef void (*functype)(int, const Scalar*, Scalar*); - static functype func[16]; - - static bool init = false; - if(!init) - { - for(int k=0; k<16; ++k) - func[k] = 0; - - func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,ColMajor>::run); - func[TR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,RowMajor>::run); - func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, Conj, RowMajor>::run); - - func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,ColMajor>::run); - func[TR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,RowMajor>::run); - func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, Conj, RowMajor>::run); - - func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,ColMajor>::run); - func[TR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,RowMajor>::run); - func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,Conj, RowMajor>::run); - - func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,ColMajor>::run); - func[TR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,RowMajor>::run); - func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,Conj, RowMajor>::run); - - init = true; - } + static const functype func[16] = { + // array index: NOTR | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,ColMajor>::run), + // array index: TR | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,RowMajor>::run), + // array index: ADJ | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, Conj, RowMajor>::run), + 0, + // array index: NOTR | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,ColMajor>::run), + // array index: TR | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,RowMajor>::run), + // array index: ADJ | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, Conj, RowMajor>::run), + 0, + // array index: NOTR | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,ColMajor>::run), + // array index: TR | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,RowMajor>::run), + // array index: ADJ | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,Conj, RowMajor>::run), + 0, + // array index: NOTR | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,ColMajor>::run), + // array index: TR | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,RowMajor>::run), + // array index: ADJ | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,Conj, RowMajor>::run), + 0 + }; Scalar* ap = reinterpret_cast<Scalar*>(pap); Scalar* x = reinterpret_cast<Scalar*>(px); diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h index cac89b268..7620f0a38 100644 --- a/blas/level2_real_impl.h +++ b/blas/level2_real_impl.h @@ -10,28 +10,22 @@ #include "common.h" // y = alpha*A*x + beta*y -int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) +int EIGEN_BLAS_FUNC(symv) (const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda, + const RealScalar *px, const int *incx, const RealScalar *pbeta, RealScalar *py, const int *incy) { typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Upper,false,false>::run); - func[LO] = (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Lower,false,false>::run); - - init = true; - } - - Scalar* a = reinterpret_cast<Scalar*>(pa); - Scalar* x = reinterpret_cast<Scalar*>(px); + static const functype func[2] = { + // array index: UP + (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Upper,false,false>::run), + // array index: LO + (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Lower,false,false>::run), + }; + + const Scalar* a = reinterpret_cast<const Scalar*>(pa); + const Scalar* x = reinterpret_cast<const Scalar*>(px); Scalar* y = reinterpret_cast<Scalar*>(py); - Scalar alpha = *reinterpret_cast<Scalar*>(palpha); - Scalar beta = *reinterpret_cast<Scalar*>(pbeta); + Scalar alpha = *reinterpret_cast<const Scalar*>(palpha); + Scalar beta = *reinterpret_cast<const Scalar*>(pbeta); // check arguments int info = 0; @@ -46,7 +40,7 @@ int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *p if(*n==0) return 0; - Scalar* actual_x = get_compact_vector(x,*n,*incx); + const Scalar* actual_x = get_compact_vector(x,*n,*incx); Scalar* actual_y = get_compact_vector(y,*n,*incy); if(beta!=Scalar(1)) @@ -68,41 +62,20 @@ int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *p } // C := alpha*x*x' + C -int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(syr)(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, RealScalar *pc, const int *ldc) { -// typedef void (*functype)(int, const Scalar *, int, Scalar *, int, Scalar); -// static functype func[2]; - -// static bool init = false; -// if(!init) -// { -// for(int k=0; k<2; ++k) -// func[k] = 0; -// -// func[UP] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,UpperTriangular>::run); -// func[LO] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,LowerTriangular>::run); - -// init = true; -// } typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, const Scalar&); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (selfadjoint_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run); - func[LO] = (selfadjoint_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run); - - init = true; - } - - Scalar* x = reinterpret_cast<Scalar*>(px); + static const functype func[2] = { + // array index: UP + (selfadjoint_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run), + // array index: LO + (selfadjoint_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run), + }; + + const Scalar* x = reinterpret_cast<const Scalar*>(px); Scalar* c = reinterpret_cast<Scalar*>(pc); - Scalar alpha = *reinterpret_cast<Scalar*>(palpha); + Scalar alpha = *reinterpret_cast<const Scalar*>(palpha); int info = 0; if(UPLO(*uplo)==INVALID) info = 1; @@ -115,7 +88,7 @@ int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, if(*n==0 || alpha==Scalar(0)) return 1; // if the increment is not 1, let's copy it to a temporary vector to enable vectorization - Scalar* x_cpy = get_compact_vector(x,*n,*incx); + const Scalar* x_cpy = get_compact_vector(x,*n,*incx); int code = UPLO(*uplo); if(code>=2 || func[code]==0) @@ -129,41 +102,20 @@ int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, } // C := alpha*x*y' + alpha*y*x' + C -int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(syr2)(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, const RealScalar *py, const int *incy, RealScalar *pc, const int *ldc) { -// typedef void (*functype)(int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar); -// static functype func[2]; -// -// static bool init = false; -// if(!init) -// { -// for(int k=0; k<2; ++k) -// func[k] = 0; -// -// func[UP] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,UpperTriangular>::run); -// func[LO] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,LowerTriangular>::run); -// -// init = true; -// } typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::rank2_update_selector<Scalar,int,Upper>::run); - func[LO] = (internal::rank2_update_selector<Scalar,int,Lower>::run); - - init = true; - } - - Scalar* x = reinterpret_cast<Scalar*>(px); - Scalar* y = reinterpret_cast<Scalar*>(py); + static const functype func[2] = { + // array index: UP + (internal::rank2_update_selector<Scalar,int,Upper>::run), + // array index: LO + (internal::rank2_update_selector<Scalar,int,Lower>::run), + }; + + const Scalar* x = reinterpret_cast<const Scalar*>(px); + const Scalar* y = reinterpret_cast<const Scalar*>(py); Scalar* c = reinterpret_cast<Scalar*>(pc); - Scalar alpha = *reinterpret_cast<Scalar*>(palpha); + Scalar alpha = *reinterpret_cast<const Scalar*>(palpha); int info = 0; if(UPLO(*uplo)==INVALID) info = 1; @@ -177,8 +129,8 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px if(alpha==Scalar(0)) return 1; - Scalar* x_cpy = get_compact_vector(x,*n,*incx); - Scalar* y_cpy = get_compact_vector(y,*n,*incy); + const Scalar* x_cpy = get_compact_vector(x,*n,*incx); + const Scalar* y_cpy = get_compact_vector(y,*n,*incy); int code = UPLO(*uplo); if(code>=2 || func[code]==0) @@ -234,19 +186,12 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *pap) { typedef void (*functype)(int, Scalar*, const Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Upper,false,false>::run); - func[LO] = (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Lower,false,false>::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Upper,false,false>::run), + // array index: LO + (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Lower,false,false>::run), + }; Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* ap = reinterpret_cast<Scalar*>(pap); @@ -285,19 +230,12 @@ int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *in int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap) { typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::packed_rank2_update_selector<Scalar,int,Upper>::run); - func[LO] = (internal::packed_rank2_update_selector<Scalar,int,Lower>::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::packed_rank2_update_selector<Scalar,int,Upper>::run), + // array index: LO + (internal::packed_rank2_update_selector<Scalar,int,Lower>::run), + }; Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* y = reinterpret_cast<Scalar*>(py); diff --git a/blas/level3_impl.h b/blas/level3_impl.h index 6a6b00728..beb36c47d 100644 --- a/blas/level3_impl.h +++ b/blas/level3_impl.h @@ -9,34 +9,40 @@ #include <iostream> #include "common.h" -int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(gemm)(const char *opa, const char *opb, const int *m, const int *n, const int *k, const RealScalar *palpha, + const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { // std::cerr << "in gemm " << *opa << " " << *opb << " " << *m << " " << *n << " " << *k << " " << *lda << " " << *ldb << " " << *ldc << " " << *palpha << " " << *pbeta << "\n"; typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar, internal::level3_blocking<Scalar,Scalar>&, Eigen::internal::GemmParallelInfo<DenseIndex>*); - static functype func[12]; - - static bool init = false; - if(!init) - { - for(int i=0; i<12; ++i) - func[i] = 0; - func[NOTR | (NOTR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,ColMajor,false,ColMajor>::run); - func[TR | (NOTR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,false,ColMajor>::run); - func[ADJ | (NOTR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor>::run); - func[NOTR | (TR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,false,ColMajor>::run); - func[TR | (TR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,false,ColMajor>::run); - func[ADJ | (TR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,false,ColMajor>::run); - func[NOTR | (ADJ << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor>::run); - func[TR | (ADJ << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,Conj, ColMajor>::run); - func[ADJ | (ADJ << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,Conj, ColMajor>::run); - init = true; - } - - Scalar* a = reinterpret_cast<Scalar*>(pa); - Scalar* b = reinterpret_cast<Scalar*>(pb); + static const functype func[12] = { + // array index: NOTR | (NOTR << 2) + (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,ColMajor,false,ColMajor>::run), + // array index: TR | (NOTR << 2) + (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,false,ColMajor>::run), + // array index: ADJ | (NOTR << 2) + (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor>::run), + 0, + // array index: NOTR | (TR << 2) + (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,false,ColMajor>::run), + // array index: TR | (TR << 2) + (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,false,ColMajor>::run), + // array index: ADJ | (TR << 2) + (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,false,ColMajor>::run), + 0, + // array index: NOTR | (ADJ << 2) + (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor>::run), + // array index: TR | (ADJ << 2) + (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,Conj, ColMajor>::run), + // array index: ADJ | (ADJ << 2) + (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,Conj, ColMajor>::run), + 0 + }; + + const Scalar* a = reinterpret_cast<const Scalar*>(pa); + const Scalar* b = reinterpret_cast<const Scalar*>(pb); Scalar* c = reinterpret_cast<Scalar*>(pc); - Scalar alpha = *reinterpret_cast<Scalar*>(palpha); - Scalar beta = *reinterpret_cast<Scalar*>(pbeta); + Scalar alpha = *reinterpret_cast<const Scalar*>(palpha); + Scalar beta = *reinterpret_cast<const Scalar*>(pbeta); int info = 0; if(OP(*opa)==INVALID) info = 1; @@ -69,57 +75,73 @@ int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScal return 0; } -int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb) +int EIGEN_BLAS_FUNC(trsm)(const char *side, const char *uplo, const char *opa, const char *diag, const int *m, const int *n, + const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb) { // std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n"; typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, internal::level3_blocking<Scalar,Scalar>&); - static functype func[32]; - - static bool init = false; - if(!init) - { - for(int i=0; i<32; ++i) - func[i] = 0; - - func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,ColMajor,ColMajor>::run); - func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,RowMajor,ColMajor>::run); - func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, Conj, RowMajor,ColMajor>::run); - - func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,ColMajor,ColMajor>::run); - func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,RowMajor,ColMajor>::run); - func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, Conj, RowMajor,ColMajor>::run); - - func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,ColMajor,ColMajor>::run); - func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,RowMajor,ColMajor>::run); - func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, Conj, RowMajor,ColMajor>::run); - - func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,ColMajor,ColMajor>::run); - func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,RowMajor,ColMajor>::run); - func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, Conj, RowMajor,ColMajor>::run); - - - func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,ColMajor,ColMajor>::run); - func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,RowMajor,ColMajor>::run); - func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,Conj, RowMajor,ColMajor>::run); - - func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,ColMajor,ColMajor>::run); - func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,RowMajor,ColMajor>::run); - func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,Conj, RowMajor,ColMajor>::run); - - func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,ColMajor,ColMajor>::run); - func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,RowMajor,ColMajor>::run); - func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,Conj, RowMajor,ColMajor>::run); - - func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,ColMajor,ColMajor>::run); - func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,RowMajor,ColMajor>::run); - func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,Conj, RowMajor,ColMajor>::run); - - init = true; - } - - Scalar* a = reinterpret_cast<Scalar*>(pa); + static const functype func[32] = { + // array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,ColMajor,ColMajor>::run), + // array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,RowMajor,ColMajor>::run), + // array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, Conj, RowMajor,ColMajor>::run),\ + 0, + // array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,ColMajor,ColMajor>::run), + // array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,RowMajor,ColMajor>::run), + // array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, Conj, RowMajor,ColMajor>::run), + 0, + // array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,ColMajor,ColMajor>::run), + // array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,RowMajor,ColMajor>::run), + // array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, Conj, RowMajor,ColMajor>::run), + 0, + // array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,ColMajor,ColMajor>::run), + // array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,RowMajor,ColMajor>::run), + // array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, Conj, RowMajor,ColMajor>::run), + 0, + // array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,ColMajor,ColMajor>::run), + // array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,RowMajor,ColMajor>::run), + // array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,Conj, RowMajor,ColMajor>::run), + 0, + // array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,ColMajor,ColMajor>::run), + // array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,RowMajor,ColMajor>::run), + // array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,Conj, RowMajor,ColMajor>::run), + 0, + // array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,ColMajor,ColMajor>::run), + // array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,RowMajor,ColMajor>::run), + // array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,Conj, RowMajor,ColMajor>::run), + 0, + // array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,ColMajor,ColMajor>::run), + // array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,RowMajor,ColMajor>::run), + // array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,Conj, RowMajor,ColMajor>::run), + 0 + }; + + const Scalar* a = reinterpret_cast<const Scalar*>(pa); Scalar* b = reinterpret_cast<Scalar*>(pb); - Scalar alpha = *reinterpret_cast<Scalar*>(palpha); + Scalar alpha = *reinterpret_cast<const Scalar*>(palpha); int info = 0; if(SIDE(*side)==INVALID) info = 1; @@ -158,55 +180,73 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, // b = alpha*op(a)*b for side = 'L'or'l' // b = alpha*b*op(a) for side = 'R'or'r' -int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb) +int EIGEN_BLAS_FUNC(trmm)(const char *side, const char *uplo, const char *opa, const char *diag, const int *m, const int *n, + const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb) { // std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n"; typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&); - static functype func[32]; - static bool init = false; - if(!init) - { - for(int k=0; k<32; ++k) - func[k] = 0; - - func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, ColMajor,false,ColMajor,false,ColMajor>::run); - func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,false,ColMajor,false,ColMajor>::run); - func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run); - - func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,ColMajor,false,ColMajor>::run); - func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,false,ColMajor>::run); - func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run); - - func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, ColMajor,false,ColMajor,false,ColMajor>::run); - func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,false,ColMajor,false,ColMajor>::run); - func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run); - - func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,ColMajor,false,ColMajor>::run); - func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,false,ColMajor>::run); - func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run); - - func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run); - func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run); - func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run); - - func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run); - func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run); - func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run); - - func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run); - func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run); - func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run); - - func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run); - func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run); - func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run); - - init = true; - } - - Scalar* a = reinterpret_cast<Scalar*>(pa); + static const functype func[32] = { + // array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, ColMajor,false,ColMajor,false,ColMajor>::run), + // array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,false,ColMajor,false,ColMajor>::run), + // array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run), + 0, + // array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,ColMajor,false,ColMajor>::run), + // array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,false,ColMajor>::run), + // array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run), + 0, + // array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, ColMajor,false,ColMajor,false,ColMajor>::run), + // array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,false,ColMajor,false,ColMajor>::run), + // array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run), + 0, + // array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,ColMajor,false,ColMajor>::run), + // array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,false,ColMajor>::run), + // array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run), + 0, + // array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run), + // array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run), + // array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run), + 0, + // array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run), + // array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run), + // array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run), + 0, + // array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run), + // array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run), + // array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run), + 0, + // array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run), + // array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run), + // array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run), + 0 + }; + + const Scalar* a = reinterpret_cast<const Scalar*>(pa); Scalar* b = reinterpret_cast<Scalar*>(pb); - Scalar alpha = *reinterpret_cast<Scalar*>(palpha); + Scalar alpha = *reinterpret_cast<const Scalar*>(palpha); int info = 0; if(SIDE(*side)==INVALID) info = 1; @@ -244,14 +284,15 @@ int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, // c = alpha*a*b + beta*c for side = 'L'or'l' // c = alpha*b*a + beta*c for side = 'R'or'r -int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(symm)(const char *side, const char *uplo, const int *m, const int *n, const RealScalar *palpha, + const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { // std::cerr << "in symm " << *side << " " << *uplo << " " << *m << "x" << *n << " lda:" << *lda << " ldb:" << *ldb << " ldc:" << *ldc << " alpha:" << *palpha << " beta:" << *pbeta << "\n"; - Scalar* a = reinterpret_cast<Scalar*>(pa); - Scalar* b = reinterpret_cast<Scalar*>(pb); + const Scalar* a = reinterpret_cast<const Scalar*>(pa); + const Scalar* b = reinterpret_cast<const Scalar*>(pb); Scalar* c = reinterpret_cast<Scalar*>(pc); - Scalar alpha = *reinterpret_cast<Scalar*>(palpha); - Scalar beta = *reinterpret_cast<Scalar*>(pbeta); + Scalar alpha = *reinterpret_cast<const Scalar*>(palpha); + Scalar beta = *reinterpret_cast<const Scalar*>(pbeta); int info = 0; if(SIDE(*side)==INVALID) info = 1; @@ -275,9 +316,9 @@ int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *pa return 1; } + int size = (SIDE(*side)==LEFT) ? (*m) : (*n); #if ISCOMPLEX // FIXME add support for symmetric complex matrix - int size = (SIDE(*side)==LEFT) ? (*m) : (*n); Matrix<Scalar,Dynamic,Dynamic,ColMajor> matA(size,size); if(UPLO(*uplo)==UP) { @@ -294,13 +335,15 @@ int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *pa else if(SIDE(*side)==RIGHT) matrix(c, *m, *n, *ldc) += alpha * matrix(b, *m, *n, *ldb) * matA; #else + internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*m,*n,size,1,false); + if(SIDE(*side)==LEFT) - if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); - else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); + if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking); + else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking); else return 0; else if(SIDE(*side)==RIGHT) - if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, RowMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); - else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, ColMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); + if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, RowMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking); + else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, ColMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking); else return 0; else return 0; @@ -311,35 +354,34 @@ int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *pa // c = alpha*a*a' + beta*c for op = 'N'or'n' // c = alpha*a'*a + beta*c for op = 'T'or't','C'or'c' -int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(syrk)(const char *uplo, const char *op, const int *n, const int *k, + const RealScalar *palpha, const RealScalar *pa, const int *lda, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { // std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n"; #if !ISCOMPLEX - typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&); - static functype func[8]; - - static bool init = false; - if(!init) - { - for(int i=0; i<8; ++i) - func[i] = 0; - - func[NOTR | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, Upper>::run); - func[TR | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, Upper>::run); - func[ADJ | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,Upper>::run); - - func[NOTR | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, Lower>::run); - func[TR | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, Lower>::run); - func[ADJ | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,Lower>::run); - - init = true; - } + typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&); + static const functype func[8] = { + // array index: NOTR | (UP << 2) + (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, Upper>::run), + // array index: TR | (UP << 2) + (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, Upper>::run), + // array index: ADJ | (UP << 2) + (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,Upper>::run), + 0, + // array index: NOTR | (LO << 2) + (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, Lower>::run), + // array index: TR | (LO << 2) + (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, Lower>::run), + // array index: ADJ | (LO << 2) + (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,Lower>::run), + 0 + }; #endif - Scalar* a = reinterpret_cast<Scalar*>(pa); + const Scalar* a = reinterpret_cast<const Scalar*>(pa); Scalar* c = reinterpret_cast<Scalar*>(pc); - Scalar alpha = *reinterpret_cast<Scalar*>(palpha); - Scalar beta = *reinterpret_cast<Scalar*>(pbeta); + Scalar alpha = *reinterpret_cast<const Scalar*>(palpha); + Scalar beta = *reinterpret_cast<const Scalar*>(pbeta); int info = 0; if(UPLO(*uplo)==INVALID) info = 1; @@ -381,8 +423,10 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp matrix(c, *n, *n, *ldc).triangularView<Lower>() += alpha * matrix(a,*k,*n,*lda).transpose() * matrix(a,*k,*n,*lda); } #else + internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*n,*n,*k,1,false); + int code = OP(*op) | (UPLO(*uplo) << 2); - func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha); + func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha, blocking); #endif return 0; @@ -390,13 +434,14 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp // c = alpha*a*b' + alpha*b*a' + beta*c for op = 'N'or'n' // c = alpha*a'*b + alpha*b'*a + beta*c for op = 'T'or't' -int EIGEN_BLAS_FUNC(syr2k)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(syr2k)(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha, + const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { - Scalar* a = reinterpret_cast<Scalar*>(pa); - Scalar* b = reinterpret_cast<Scalar*>(pb); + const Scalar* a = reinterpret_cast<const Scalar*>(pa); + const Scalar* b = reinterpret_cast<const Scalar*>(pb); Scalar* c = reinterpret_cast<Scalar*>(pc); - Scalar alpha = *reinterpret_cast<Scalar*>(palpha); - Scalar beta = *reinterpret_cast<Scalar*>(pbeta); + Scalar alpha = *reinterpret_cast<const Scalar*>(palpha); + Scalar beta = *reinterpret_cast<const Scalar*>(pbeta); // std::cerr << "in syr2k " << *uplo << " " << *op << " " << *n << " " << *k << " " << alpha << " " << *lda << " " << *ldb << " " << beta << " " << *ldc << "\n"; @@ -457,13 +502,14 @@ int EIGEN_BLAS_FUNC(syr2k)(char *uplo, char *op, int *n, int *k, RealScalar *pal // c = alpha*a*b + beta*c for side = 'L'or'l' // c = alpha*b*a + beta*c for side = 'R'or'r -int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(hemm)(const char *side, const char *uplo, const int *m, const int *n, const RealScalar *palpha, + const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { - Scalar* a = reinterpret_cast<Scalar*>(pa); - Scalar* b = reinterpret_cast<Scalar*>(pb); + const Scalar* a = reinterpret_cast<const Scalar*>(pa); + const Scalar* b = reinterpret_cast<const Scalar*>(pb); Scalar* c = reinterpret_cast<Scalar*>(pc); - Scalar alpha = *reinterpret_cast<Scalar*>(palpha); - Scalar beta = *reinterpret_cast<Scalar*>(pbeta); + Scalar alpha = *reinterpret_cast<const Scalar*>(palpha); + Scalar beta = *reinterpret_cast<const Scalar*>(pbeta); // std::cerr << "in hemm " << *side << " " << *uplo << " " << *m << " " << *n << " " << alpha << " " << *lda << " " << beta << " " << *ldc << "\n"; @@ -486,20 +532,23 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa return 1; } + int size = (SIDE(*side)==LEFT) ? (*m) : (*n); + internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*m,*n,size,1,false); + if(SIDE(*side)==LEFT) { if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar,DenseIndex,RowMajor,true,Conj, ColMajor,false,false, ColMajor> - ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); + ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking); else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,true,false, ColMajor,false,false, ColMajor> - ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); + ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking); else return 0; } else if(SIDE(*side)==RIGHT) { if(UPLO(*uplo)==UP) matrix(c,*m,*n,*ldc) += alpha * matrix(b,*m,*n,*ldb) * matrix(a,*n,*n,*lda).selfadjointView<Upper>();/*internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, RowMajor,true,Conj, ColMajor> - ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);*/ + ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);*/ else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, ColMajor,true,false, ColMajor> - ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); + ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking); else return 0; } else @@ -512,29 +561,28 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa // c = alpha*a*conj(a') + beta*c for op = 'N'or'n' // c = alpha*conj(a')*a + beta*c for op = 'C'or'c' -int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(herk)(const char *uplo, const char *op, const int *n, const int *k, + const RealScalar *palpha, const RealScalar *pa, const int *lda, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { // std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n"; - typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&); - static functype func[8]; - - static bool init = false; - if(!init) - { - for(int i=0; i<8; ++i) - func[i] = 0; - - func[NOTR | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,Upper>::run); - func[ADJ | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,Upper>::run); - - func[NOTR | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,Lower>::run); - func[ADJ | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,Lower>::run); - - init = true; - } - - Scalar* a = reinterpret_cast<Scalar*>(pa); + typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&); + static const functype func[8] = { + // array index: NOTR | (UP << 2) + (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,Upper>::run), + 0, + // array index: ADJ | (UP << 2) + (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,Upper>::run), + 0, + // array index: NOTR | (LO << 2) + (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,Lower>::run), + 0, + // array index: ADJ | (LO << 2) + (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,Lower>::run), + 0 + }; + + const Scalar* a = reinterpret_cast<const Scalar*>(pa); Scalar* c = reinterpret_cast<Scalar*>(pc); RealScalar alpha = *palpha; RealScalar beta = *pbeta; @@ -571,7 +619,8 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp if(*k>0 && alpha!=RealScalar(0)) { - func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha); + internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*n,*n,*k,1,false); + func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha, blocking); matrix(c, *n, *n, *ldc).diagonal().imag().setZero(); } return 0; @@ -579,12 +628,13 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp // c = alpha*a*conj(b') + conj(alpha)*b*conj(a') + beta*c, for op = 'N'or'n' // c = alpha*conj(a')*b + conj(alpha)*conj(b')*a + beta*c, for op = 'C'or'c' -int EIGEN_BLAS_FUNC(her2k)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(her2k)(const char *uplo, const char *op, const int *n, const int *k, + const RealScalar *palpha, const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { - Scalar* a = reinterpret_cast<Scalar*>(pa); - Scalar* b = reinterpret_cast<Scalar*>(pb); + const Scalar* a = reinterpret_cast<const Scalar*>(pa); + const Scalar* b = reinterpret_cast<const Scalar*>(pb); Scalar* c = reinterpret_cast<Scalar*>(pc); - Scalar alpha = *reinterpret_cast<Scalar*>(palpha); + Scalar alpha = *reinterpret_cast<const Scalar*>(palpha); RealScalar beta = *pbeta; // std::cerr << "in her2k " << *uplo << " " << *op << " " << *n << " " << *k << " " << alpha << " " << *lda << " " << *ldb << " " << beta << " " << *ldc << "\n"; |