From 2f9e6314b183d333ad75ec578815073ed9fb390e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 25 Jan 2016 21:56:05 +0100 Subject: update BLAS interface to general_matrix_matrix_triangular_product --- blas/level3_impl.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'blas') diff --git a/blas/level3_impl.h b/blas/level3_impl.h index 6a6b00728..835e53680 100644 --- a/blas/level3_impl.h +++ b/blas/level3_impl.h @@ -315,7 +315,7 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp { // std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n"; #if !ISCOMPLEX - typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&); + typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking&); static functype func[8]; static bool init = false; @@ -381,8 +381,10 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp matrix(c, *n, *n, *ldc).triangularView() += alpha * matrix(a,*k,*n,*lda).transpose() * matrix(a,*k,*n,*lda); } #else + internal::gemm_blocking_space blocking(*n,*n,*k,1,false); + int code = OP(*op) | (UPLO(*uplo) << 2); - func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha); + func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha, blocking); #endif return 0; @@ -516,7 +518,7 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp { // std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n"; - typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&); + typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking&); static functype func[8]; static bool init = false; @@ -571,7 +573,8 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp if(*k>0 && alpha!=RealScalar(0)) { - func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha); + internal::gemm_blocking_space blocking(*n,*n,*k,1,false); + func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha, blocking); matrix(c, *n, *n, *ldc).diagonal().imag().setZero(); } return 0; -- cgit v1.2.3 From 8328caa618731fb2a5802daaf8088db4175567a2 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 25 Jan 2016 22:06:42 +0100 Subject: bug #51: add block preallocation mechanism to selfadjoit*matrix product. --- Eigen/src/Core/products/SelfadjointMatrixMatrix.h | 46 ++++++++++++----------- blas/level3_impl.h | 23 +++++++----- test/nomalloc.cpp | 2 +- 3 files changed, 39 insertions(+), 32 deletions(-) (limited to 'blas') diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h index f84f54982..ba8ee1d53 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h @@ -291,7 +291,7 @@ struct product_selfadjoint_matrix& blocking) { product_selfadjoint_matrix::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs), ColMajor> - ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha); + ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking); } }; @@ -314,7 +314,7 @@ struct product_selfadjoint_matrix& blocking); }; template & blocking) { Index size = rows; @@ -340,17 +340,16 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix(kc, mc, nc, 1); - // kc must smaller than mc + Index kc = blocking.kc(); // cache block size along the K direction + Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction + // kc must be smaller than mc kc = (std::min)(kc,mc); + std::size_t sizeA = kc*mc; std::size_t sizeB = kc*cols; - ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); - ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); - Scalar* blockB = allocatedBlockB; + + ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); gebp_kernel gebp_kernel; symm_pack_lhs pack_lhs; @@ -410,7 +409,7 @@ struct product_selfadjoint_matrix& blocking); }; template & blocking) { Index size = cols; @@ -432,14 +431,12 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix(kc, mc, nc, 1); + Index kc = blocking.kc(); // cache block size along the K direction + Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction + std::size_t sizeA = kc*mc; std::size_t sizeB = kc*cols; - ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); - ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); - Scalar* blockB = allocatedBlockB; + ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); gebp_kernel gebp_kernel; gemm_pack_lhs pack_lhs; @@ -498,6 +495,11 @@ struct selfadjoint_product_impl Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) * RhsBlasTraits::extractScalarFactor(a_rhs); + typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, + Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType; + + BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false); + internal::product_selfadjoint_matrix::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, NumTraits::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), @@ -509,7 +511,7 @@ struct selfadjoint_product_impl &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info &dst.coeffRef(0,0), dst.outerStride(), // result info - actualAlpha // alpha + actualAlpha, blocking // alpha ); } }; diff --git a/blas/level3_impl.h b/blas/level3_impl.h index 835e53680..b2772b190 100644 --- a/blas/level3_impl.h +++ b/blas/level3_impl.h @@ -275,9 +275,9 @@ int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *pa return 1; } + int size = (SIDE(*side)==LEFT) ? (*m) : (*n); #if ISCOMPLEX // FIXME add support for symmetric complex matrix - int size = (SIDE(*side)==LEFT) ? (*m) : (*n); Matrix matA(size,size); if(UPLO(*uplo)==UP) { @@ -294,13 +294,15 @@ int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *pa else if(SIDE(*side)==RIGHT) matrix(c, *m, *n, *ldc) += alpha * matrix(b, *m, *n, *ldb) * matA; #else + internal::gemm_blocking_space blocking(*m,*n,size,1,false); + if(SIDE(*side)==LEFT) - if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); - else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); + if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking); + else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking); else return 0; else if(SIDE(*side)==RIGHT) - if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); - else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); + if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking); + else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking); else return 0; else return 0; @@ -488,20 +490,23 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa return 1; } + int size = (SIDE(*side)==LEFT) ? (*m) : (*n); + internal::gemm_blocking_space blocking(*m,*n,size,1,false); + if(SIDE(*side)==LEFT) { if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix - ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); + ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking); else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix - ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); + ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking); else return 0; } else if(SIDE(*side)==RIGHT) { if(UPLO(*uplo)==UP) matrix(c,*m,*n,*ldc) += alpha * matrix(b,*m,*n,*ldb) * matrix(a,*n,*n,*lda).selfadjointView();/*internal::product_selfadjoint_matrix - ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);*/ + ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);*/ else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix - ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); + ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking); else return 0; } else diff --git a/test/nomalloc.cpp b/test/nomalloc.cpp index d85e9e5bc..077ecae55 100644 --- a/test/nomalloc.cpp +++ b/test/nomalloc.cpp @@ -85,7 +85,7 @@ template void nomalloc(const MatrixType& m) m2.template selfadjointView().rankUpdate(m1); m2 += m2.template triangularView() * m1; m2.template triangularView() = m2 * m2; -// m1 += m1.template selfadjointView() * m2; + m1 += m1.template selfadjointView() * m2; VERIFY_IS_APPROX(m2,m2); } -- cgit v1.2.3 From 639b1d864a3d8e3cd5f2f060af9e1a5fdae27008 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 26 Jan 2016 11:44:16 -0500 Subject: bug #1152: Fix data race in static initialization of blas --- blas/level2_cplx_impl.h | 95 +++++---------- blas/level2_impl.h | 303 ++++++++++++++++++++++++----------------------- blas/level2_real_impl.h | 123 +++++-------------- blas/level3_impl.h | 305 +++++++++++++++++++++++++++--------------------- 4 files changed, 388 insertions(+), 438 deletions(-) (limited to 'blas') diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h index 9b845de22..2edc51596 100644 --- a/blas/level2_cplx_impl.h +++ b/blas/level2_cplx_impl.h @@ -19,19 +19,12 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) { typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::selfadjoint_matrix_vector_product::run); - func[LO] = (internal::selfadjoint_matrix_vector_product::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::selfadjoint_matrix_vector_product::run), + // array index: LO + (internal::selfadjoint_matrix_vector_product::run), + }; Scalar* a = reinterpret_cast(pa); Scalar* x = reinterpret_cast(px); @@ -111,19 +104,12 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pap) { typedef void (*functype)(int, Scalar*, const Scalar*, RealScalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::selfadjoint_packed_rank1_update::run); - func[LO] = (internal::selfadjoint_packed_rank1_update::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::selfadjoint_packed_rank1_update::run), + // array index: LO + (internal::selfadjoint_packed_rank1_update::run), + }; Scalar* x = reinterpret_cast(px); Scalar* ap = reinterpret_cast(pap); @@ -162,19 +148,12 @@ int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap) { typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::packed_rank2_update_selector::run); - func[LO] = (internal::packed_rank2_update_selector::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::packed_rank2_update_selector::run), + // array index: LO + (internal::packed_rank2_update_selector::run), + }; Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); @@ -217,19 +196,12 @@ int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda) { typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, const Scalar&); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (selfadjoint_rank1_update::run); - func[LO] = (selfadjoint_rank1_update::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (selfadjoint_rank1_update::run), + // array index: LO + (selfadjoint_rank1_update::run), + }; Scalar* x = reinterpret_cast(px); Scalar* a = reinterpret_cast(pa); @@ -271,19 +243,12 @@ int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int EIGEN_BLAS_FUNC(her2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda) { typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::rank2_update_selector::run); - func[LO] = (internal::rank2_update_selector::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::rank2_update_selector::run), + // array index: LO + (internal::rank2_update_selector::run), + }; Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); diff --git a/blas/level2_impl.h b/blas/level2_impl.h index 917f2e372..d09db0cc6 100644 --- a/blas/level2_impl.h +++ b/blas/level2_impl.h @@ -26,20 +26,15 @@ struct general_matrix_vector_product_wrapper int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *incb, RealScalar *pbeta, RealScalar *pc, int *incc) { typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int , Scalar *, int, Scalar); - static functype func[4]; - - static bool init = false; - if(!init) - { - for(int k=0; k<4; ++k) - func[k] = 0; - - func[NOTR] = (general_matrix_vector_product_wrapper::run); - func[TR ] = (general_matrix_vector_product_wrapper::run); - func[ADJ ] = (general_matrix_vector_product_wrapper::run); - - init = true; - } + static const functype func[4] = { + // array index: NOTR + (general_matrix_vector_product_wrapper::run), + // array index: TR + (general_matrix_vector_product_wrapper::run), + // array index: ADJ + (general_matrix_vector_product_wrapper::run), + 0 + }; Scalar* a = reinterpret_cast(pa); Scalar* b = reinterpret_cast(pb); @@ -90,32 +85,36 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb) { typedef void (*functype)(int, const Scalar *, int, Scalar *); - static functype func[16]; - - static bool init = false; - if(!init) - { - for(int k=0; k<16; ++k) - func[k] = 0; - - func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector::run); - func[TR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector::run); - func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector::run); - - func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector::run); - func[TR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector::run); - func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector::run); - - func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::triangular_solve_vector::run); - func[TR | (UP << 2) | (UNIT << 3)] = (internal::triangular_solve_vector::run); - func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::triangular_solve_vector::run); - - func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::triangular_solve_vector::run); - func[TR | (LO << 2) | (UNIT << 3)] = (internal::triangular_solve_vector::run); - func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::triangular_solve_vector::run); - - init = true; - } + static const functype func[16] = { + // array index: NOTR | (UP << 2) | (NUNIT << 3) + (internal::triangular_solve_vector::run), + // array index: TR | (UP << 2) | (NUNIT << 3) + (internal::triangular_solve_vector::run), + // array index: ADJ | (UP << 2) | (NUNIT << 3) + (internal::triangular_solve_vector::run), + 0, + // array index: NOTR | (LO << 2) | (NUNIT << 3) + (internal::triangular_solve_vector::run), + // array index: TR | (LO << 2) | (NUNIT << 3) + (internal::triangular_solve_vector::run), + // array index: ADJ | (LO << 2) | (NUNIT << 3) + (internal::triangular_solve_vector::run), + 0, + // array index: NOTR | (UP << 2) | (UNIT << 3) + (internal::triangular_solve_vector::run), + // array index: TR | (UP << 2) | (UNIT << 3) + (internal::triangular_solve_vector::run), + // array index: ADJ | (UP << 2) | (UNIT << 3) + (internal::triangular_solve_vector::run), + 0, + // array index: NOTR | (LO << 2) | (UNIT << 3) + (internal::triangular_solve_vector::run), + // array index: TR | (LO << 2) | (UNIT << 3) + (internal::triangular_solve_vector::run), + // array index: ADJ | (LO << 2) | (UNIT << 3) + (internal::triangular_solve_vector::run), + 0 + }; Scalar* a = reinterpret_cast(pa); Scalar* b = reinterpret_cast(pb); @@ -145,32 +144,36 @@ int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb) { typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, const Scalar&); - static functype func[16]; - - static bool init = false; - if(!init) - { - for(int k=0; k<16; ++k) - func[k] = 0; - - func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[TR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); - - func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[TR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); - - func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[TR | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); - - func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[TR | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); - - init = true; - } + static const functype func[16] = { + // array index: NOTR | (UP << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product::run), + // array index: TR | (UP << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product::run), + // array index: ADJ | (UP << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product::run), + 0, + // array index: NOTR | (LO << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product::run), + // array index: TR | (LO << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product::run), + // array index: ADJ | (LO << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product::run), + 0, + // array index: NOTR | (UP << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product::run), + // array index: TR | (UP << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product::run), + // array index: ADJ | (UP << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product::run), + 0, + // array index: NOTR | (LO << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product::run), + // array index: TR | (LO << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product::run), + // array index: ADJ | (LO << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product::run), + 0 + }; Scalar* a = reinterpret_cast(pa); Scalar* b = reinterpret_cast(pb); @@ -346,32 +349,36 @@ int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *opa, char *diag, int *n, int *k, Rea int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx) { typedef void (*functype)(int, int, const Scalar *, int, Scalar *); - static functype func[16]; - - static bool init = false; - if(!init) - { - for(int i=0; i<16; ++i) - func[i] = 0; - - func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector::run); - func[TR | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector::run); - func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector::run); - - func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector::run); - func[TR | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector::run); - func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector::run); - - func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector::run); - func[TR | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector::run); - func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector::run); - - func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector::run); - func[TR | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector::run); - func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector::run); - - init = true; - } + static const functype func[16] = { + // array index: NOTR | (UP << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector::run), + // array index: TR | (UP << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector::run), + // array index: ADJ | (UP << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector::run), + 0, + // array index: NOTR | (LO << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector::run), + // array index: TR | (LO << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector::run), + // array index: ADJ | (LO << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector::run), + 0, + // array index: NOTR | (UP << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector::run), + // array index: TR | (UP << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector::run), + // array index: ADJ | (UP << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector::run), + 0, + // array index: NOTR | (LO << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector::run), + // array index: TR | (LO << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector::run), + // array index: ADJ | (LO << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector::run), + 0, + }; Scalar* a = reinterpret_cast(pa); Scalar* x = reinterpret_cast(px); @@ -416,32 +423,36 @@ int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, Real int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) { typedef void (*functype)(int, const Scalar*, const Scalar*, Scalar*, Scalar); - static functype func[16]; - - static bool init = false; - if(!init) - { - for(int k=0; k<16; ++k) - func[k] = 0; - - func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - func[TR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - - func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - func[TR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - - func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - func[TR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - - func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - func[TR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - - init = true; - } + static const functype func[16] = { + // array index: NOTR | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + // array index: TR | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + // array index: ADJ | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + 0, + // array index: NOTR | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + // array index: TR | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + // array index: ADJ | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + 0, + // array index: NOTR | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + // array index: TR | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + // array index: ADJ | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + 0, + // array index: NOTR | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + // array index: TR | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + // array index: ADJ | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + 0 + }; Scalar* ap = reinterpret_cast(pap); Scalar* x = reinterpret_cast(px); @@ -487,32 +498,36 @@ int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar int EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) { typedef void (*functype)(int, const Scalar*, Scalar*); - static functype func[16]; - - static bool init = false; - if(!init) - { - for(int k=0; k<16; ++k) - func[k] = 0; - - func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); - func[TR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); - func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); - - func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); - func[TR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); - func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); - - func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); - func[TR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); - func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); - - func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); - func[TR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); - func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); - - init = true; - } + static const functype func[16] = { + // array index: NOTR | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector::run), + // array index: TR | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector::run), + // array index: ADJ | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector::run), + 0, + // array index: NOTR | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector::run), + // array index: TR | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector::run), + // array index: ADJ | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector::run), + 0, + // array index: NOTR | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector::run), + // array index: TR | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector::run), + // array index: ADJ | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector::run), + 0, + // array index: NOTR | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector::run), + // array index: TR | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector::run), + // array index: ADJ | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector::run), + 0 + }; Scalar* ap = reinterpret_cast(pap); Scalar* x = reinterpret_cast(px); diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h index cac89b268..4896a03d9 100644 --- a/blas/level2_real_impl.h +++ b/blas/level2_real_impl.h @@ -13,19 +13,12 @@ int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) { typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::selfadjoint_matrix_vector_product::run); - func[LO] = (internal::selfadjoint_matrix_vector_product::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::selfadjoint_matrix_vector_product::run), + // array index: LO + (internal::selfadjoint_matrix_vector_product::run), + }; Scalar* a = reinterpret_cast(pa); Scalar* x = reinterpret_cast(px); @@ -71,34 +64,13 @@ int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *p int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pc, int *ldc) { -// typedef void (*functype)(int, const Scalar *, int, Scalar *, int, Scalar); -// static functype func[2]; - -// static bool init = false; -// if(!init) -// { -// for(int k=0; k<2; ++k) -// func[k] = 0; -// -// func[UP] = (internal::selfadjoint_product::run); -// func[LO] = (internal::selfadjoint_product::run); - -// init = true; -// } typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, const Scalar&); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (selfadjoint_rank1_update::run); - func[LO] = (selfadjoint_rank1_update::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (selfadjoint_rank1_update::run), + // array index: LO + (selfadjoint_rank1_update::run), + }; Scalar* x = reinterpret_cast(px); Scalar* c = reinterpret_cast(pc); @@ -131,34 +103,13 @@ int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, // C := alpha*x*y' + alpha*y*x' + C int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, int *ldc) { -// typedef void (*functype)(int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar); -// static functype func[2]; -// -// static bool init = false; -// if(!init) -// { -// for(int k=0; k<2; ++k) -// func[k] = 0; -// -// func[UP] = (internal::selfadjoint_product::run); -// func[LO] = (internal::selfadjoint_product::run); -// -// init = true; -// } typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::rank2_update_selector::run); - func[LO] = (internal::rank2_update_selector::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::rank2_update_selector::run), + // array index: LO + (internal::rank2_update_selector::run), + }; Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); @@ -234,19 +185,12 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *pap) { typedef void (*functype)(int, Scalar*, const Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::selfadjoint_packed_rank1_update::run); - func[LO] = (internal::selfadjoint_packed_rank1_update::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::selfadjoint_packed_rank1_update::run), + // array index: LO + (internal::selfadjoint_packed_rank1_update::run), + }; Scalar* x = reinterpret_cast(px); Scalar* ap = reinterpret_cast(pap); @@ -285,19 +229,12 @@ int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *in int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap) { typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::packed_rank2_update_selector::run); - func[LO] = (internal::packed_rank2_update_selector::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::packed_rank2_update_selector::run), + // array index: LO + (internal::packed_rank2_update_selector::run), + }; Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); diff --git a/blas/level3_impl.h b/blas/level3_impl.h index b2772b190..267a727ef 100644 --- a/blas/level3_impl.h +++ b/blas/level3_impl.h @@ -13,24 +13,29 @@ int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScal { // std::cerr << "in gemm " << *opa << " " << *opb << " " << *m << " " << *n << " " << *k << " " << *lda << " " << *ldb << " " << *ldc << " " << *palpha << " " << *pbeta << "\n"; typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar, internal::level3_blocking&, Eigen::internal::GemmParallelInfo*); - static functype func[12]; - - static bool init = false; - if(!init) - { - for(int i=0; i<12; ++i) - func[i] = 0; - func[NOTR | (NOTR << 2)] = (internal::general_matrix_matrix_product::run); - func[TR | (NOTR << 2)] = (internal::general_matrix_matrix_product::run); - func[ADJ | (NOTR << 2)] = (internal::general_matrix_matrix_product::run); - func[NOTR | (TR << 2)] = (internal::general_matrix_matrix_product::run); - func[TR | (TR << 2)] = (internal::general_matrix_matrix_product::run); - func[ADJ | (TR << 2)] = (internal::general_matrix_matrix_product::run); - func[NOTR | (ADJ << 2)] = (internal::general_matrix_matrix_product::run); - func[TR | (ADJ << 2)] = (internal::general_matrix_matrix_product::run); - func[ADJ | (ADJ << 2)] = (internal::general_matrix_matrix_product::run); - init = true; - } + static const functype func[12] = { + // array index: NOTR | (NOTR << 2) + (internal::general_matrix_matrix_product::run), + // array index: TR | (NOTR << 2) + (internal::general_matrix_matrix_product::run), + // array index: ADJ | (NOTR << 2) + (internal::general_matrix_matrix_product::run), + 0, + // array index: NOTR | (TR << 2) + (internal::general_matrix_matrix_product::run), + // array index: TR | (TR << 2) + (internal::general_matrix_matrix_product::run), + // array index: ADJ | (TR << 2) + (internal::general_matrix_matrix_product::run), + 0, + // array index: NOTR | (ADJ << 2) + (internal::general_matrix_matrix_product::run), + // array index: TR | (ADJ << 2) + (internal::general_matrix_matrix_product::run), + // array index: ADJ | (ADJ << 2) + (internal::general_matrix_matrix_product::run), + 0 + }; Scalar* a = reinterpret_cast(pa); Scalar* b = reinterpret_cast(pb); @@ -73,49 +78,64 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, { // std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n"; typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, internal::level3_blocking&); - static functype func[32]; - - static bool init = false; - if(!init) - { - for(int i=0; i<32; ++i) - func[i] = 0; - - func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - - func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - - func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - - func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - - - func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - - func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - - func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - - func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - - init = true; - } + static const functype func[32] = { + // array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run),\ + 0, + // array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + 0, + // array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + 0, + // array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + 0, + // array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + 0, + // array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + 0, + // array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + 0, + // array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + 0 + }; Scalar* a = reinterpret_cast(pa); Scalar* b = reinterpret_cast(pb); @@ -162,47 +182,64 @@ int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, { // std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n"; typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking&); - static functype func[32]; - static bool init = false; - if(!init) - { - for(int k=0; k<32; ++k) - func[k] = 0; - - func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - - func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - - func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - - func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - - func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - - func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - - func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - - func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - - init = true; - } + static const functype func[32] = { + // array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + 0, + // array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + 0, + // array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + 0, + // array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + 0, + // array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + 0, + // array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + 0, + // array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + 0, + // array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + 0 + }; Scalar* a = reinterpret_cast(pa); Scalar* b = reinterpret_cast(pb); @@ -318,24 +355,22 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp // std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n"; #if !ISCOMPLEX typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking&); - static functype func[8]; - - static bool init = false; - if(!init) - { - for(int i=0; i<8; ++i) - func[i] = 0; - - func[NOTR | (UP << 2)] = (internal::general_matrix_matrix_triangular_product::run); - func[TR | (UP << 2)] = (internal::general_matrix_matrix_triangular_product::run); - func[ADJ | (UP << 2)] = (internal::general_matrix_matrix_triangular_product::run); - - func[NOTR | (LO << 2)] = (internal::general_matrix_matrix_triangular_product::run); - func[TR | (LO << 2)] = (internal::general_matrix_matrix_triangular_product::run); - func[ADJ | (LO << 2)] = (internal::general_matrix_matrix_triangular_product::run); - - init = true; - } + static const functype func[8] = { + // array index: NOTR | (UP << 2) + (internal::general_matrix_matrix_triangular_product::run), + // array index: TR | (UP << 2) + (internal::general_matrix_matrix_triangular_product::run), + // array index: ADJ | (UP << 2) + (internal::general_matrix_matrix_triangular_product::run), + 0, + // array index: NOTR | (LO << 2) + (internal::general_matrix_matrix_triangular_product::run), + // array index: TR | (LO << 2) + (internal::general_matrix_matrix_triangular_product::run), + // array index: ADJ | (LO << 2) + (internal::general_matrix_matrix_triangular_product::run), + 0 + }; #endif Scalar* a = reinterpret_cast(pa); @@ -524,22 +559,20 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp // std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n"; typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking&); - static functype func[8]; - - static bool init = false; - if(!init) - { - for(int i=0; i<8; ++i) - func[i] = 0; - - func[NOTR | (UP << 2)] = (internal::general_matrix_matrix_triangular_product::run); - func[ADJ | (UP << 2)] = (internal::general_matrix_matrix_triangular_product::run); - - func[NOTR | (LO << 2)] = (internal::general_matrix_matrix_triangular_product::run); - func[ADJ | (LO << 2)] = (internal::general_matrix_matrix_triangular_product::run); - - init = true; - } + static const functype func[8] = { + // array index: NOTR | (UP << 2) + (internal::general_matrix_matrix_triangular_product::run), + 0, + // array index: ADJ | (UP << 2) + (internal::general_matrix_matrix_triangular_product::run), + 0, + // array index: NOTR | (LO << 2) + (internal::general_matrix_matrix_triangular_product::run), + 0, + // array index: ADJ | (LO << 2) + (internal::general_matrix_matrix_triangular_product::run), + 0 + }; Scalar* a = reinterpret_cast(pa); Scalar* c = reinterpret_cast(pc); -- cgit v1.2.3 From 4e8e5888d7a78d514e54a518f6692f2838314328 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 11 Apr 2016 15:12:44 +0200 Subject: Improve constness of blas level-3 interface. --- Eigen/src/misc/blas.h | 266 ++++++++++++++++++-------------------------------- blas/common.h | 19 ++++ blas/level3_impl.h | 81 ++++++++------- 3 files changed, 158 insertions(+), 208 deletions(-) (limited to 'blas') diff --git a/Eigen/src/misc/blas.h b/Eigen/src/misc/blas.h index 6fce99ed5..ae0c393f1 100644 --- a/Eigen/src/misc/blas.h +++ b/Eigen/src/misc/blas.h @@ -30,15 +30,15 @@ int BLASFUNC(cdotcw) (int *, float *, int *, float *, int *, float*); int BLASFUNC(zdotuw) (int *, double *, int *, double *, int *, double*); int BLASFUNC(zdotcw) (int *, double *, int *, double *, int *, double*); -int BLASFUNC(saxpy) (int *, float *, float *, int *, float *, int *); -int BLASFUNC(daxpy) (int *, double *, double *, int *, double *, int *); -int BLASFUNC(qaxpy) (int *, double *, double *, int *, double *, int *); -int BLASFUNC(caxpy) (int *, float *, float *, int *, float *, int *); -int BLASFUNC(zaxpy) (int *, double *, double *, int *, double *, int *); -int BLASFUNC(xaxpy) (int *, double *, double *, int *, double *, int *); -int BLASFUNC(caxpyc)(int *, float *, float *, int *, float *, int *); -int BLASFUNC(zaxpyc)(int *, double *, double *, int *, double *, int *); -int BLASFUNC(xaxpyc)(int *, double *, double *, int *, double *, int *); +int BLASFUNC(saxpy) (const int *, const float *, const float *, const int *, float *, int *); +int BLASFUNC(daxpy) (const int *, const double *, const double *, const int *, double *, int *); +int BLASFUNC(qaxpy) (const int *, const double *, const double *, const int *, double *, int *); +int BLASFUNC(caxpy) (const int *, const float *, const float *, const int *, float *, int *); +int BLASFUNC(zaxpy) (const int *, const double *, const double *, const int *, double *, int *); +int BLASFUNC(xaxpy) (const int *, const double *, const double *, const int *, double *, int *); +int BLASFUNC(caxpyc)(const int *, const float *, const float *, const int *, float *, int *); +int BLASFUNC(zaxpyc)(const int *, const double *, const double *, const int *, double *, int *); +int BLASFUNC(xaxpyc)(const int *, const double *, const double *, const int *, double *, int *); int BLASFUNC(scopy) (int *, float *, int *, float *, int *); int BLASFUNC(dcopy) (int *, double *, int *, double *, int *); @@ -177,31 +177,19 @@ int BLASFUNC(xgeru)(int *, int *, double *, double *, int *, int BLASFUNC(xgerc)(int *, int *, double *, double *, int *, double *, int *, double *, int *); -int BLASFUNC(sgemv)(char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(dgemv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(qgemv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(cgemv)(char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zgemv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xgemv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); +int BLASFUNC(sgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(dgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(qgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(cgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); -int BLASFUNC(strsv) (char *, char *, char *, int *, float *, int *, - float *, int *); -int BLASFUNC(dtrsv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(qtrsv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(ctrsv) (char *, char *, char *, int *, float *, int *, - float *, int *); -int BLASFUNC(ztrsv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(xtrsv) (char *, char *, char *, int *, double *, int *, - double *, int *); +int BLASFUNC(strsv) (const char *, const char *, const char *, const int *, const float *, const int *, float *, const int *); +int BLASFUNC(dtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *); +int BLASFUNC(qtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *); +int BLASFUNC(ctrsv) (const char *, const char *, const char *, const int *, const float *, const int *, float *, const int *); +int BLASFUNC(ztrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *); +int BLASFUNC(xtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *); int BLASFUNC(stpsv) (char *, char *, char *, int *, float *, float *, int *); int BLASFUNC(dtpsv) (char *, char *, char *, int *, double *, double *, int *); @@ -210,18 +198,12 @@ int BLASFUNC(ctpsv) (char *, char *, char *, int *, float *, float *, int *); int BLASFUNC(ztpsv) (char *, char *, char *, int *, double *, double *, int *); int BLASFUNC(xtpsv) (char *, char *, char *, int *, double *, double *, int *); -int BLASFUNC(strmv) (char *, char *, char *, int *, float *, int *, - float *, int *); -int BLASFUNC(dtrmv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(qtrmv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(ctrmv) (char *, char *, char *, int *, float *, int *, - float *, int *); -int BLASFUNC(ztrmv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(xtrmv) (char *, char *, char *, int *, double *, int *, - double *, int *); +int BLASFUNC(strmv) (const char *, const char *, const char *, const int *, const float *, const int *, float *, const int *); +int BLASFUNC(dtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *); +int BLASFUNC(qtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *); +int BLASFUNC(ctrmv) (const char *, const char *, const char *, const int *, const float *, const int *, float *, const int *); +int BLASFUNC(ztrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *); +int BLASFUNC(xtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *); int BLASFUNC(stpmv) (char *, char *, char *, int *, float *, float *, int *); int BLASFUNC(dtpmv) (char *, char *, char *, int *, double *, double *, int *); @@ -244,18 +226,12 @@ int BLASFUNC(ctbsv) (char *, char *, char *, int *, int *, float *, int *, floa int BLASFUNC(ztbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); int BLASFUNC(xtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); -int BLASFUNC(ssymv) (char *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(dsymv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(qsymv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(csymv) (char *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zsymv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xsymv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); +int BLASFUNC(ssymv) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(dsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(qsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(csymv) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); int BLASFUNC(sspmv) (char *, int *, float *, float *, float *, int *, float *, float *, int *); @@ -347,12 +323,9 @@ int BLASFUNC(zhpr2) (char *, int *, double *, int BLASFUNC(xhpr2) (char *, int *, double *, double *, int *, double *, int *, double *); -int BLASFUNC(chemv) (char *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zhemv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xhemv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); +int BLASFUNC(chemv) (const char *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zhemv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xhemv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); int BLASFUNC(chpmv) (char *, int *, float *, float *, float *, int *, float *, float *, int *); @@ -401,18 +374,12 @@ int BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *, /* Level 3 routines */ -int BLASFUNC(sgemm)(char *, char *, int *, int *, int *, float *, - float *, int *, float *, int *, float *, float *, int *); -int BLASFUNC(dgemm)(char *, char *, int *, int *, int *, double *, - double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(qgemm)(char *, char *, int *, int *, int *, double *, - double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(cgemm)(char *, char *, int *, int *, int *, float *, - float *, int *, float *, int *, float *, float *, int *); -int BLASFUNC(zgemm)(char *, char *, int *, int *, int *, double *, - double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(xgemm)(char *, char *, int *, int *, int *, double *, - double *, int *, double *, int *, double *, double *, int *); +int BLASFUNC(sgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(dgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(qgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(cgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); int BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *); @@ -434,84 +401,48 @@ int BLASFUNC(zge2mm)(char *, char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(strsm)(char *, char *, char *, char *, int *, int *, - float *, float *, int *, float *, int *); -int BLASFUNC(dtrsm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(qtrsm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(ctrsm)(char *, char *, char *, char *, int *, int *, - float *, float *, int *, float *, int *); -int BLASFUNC(ztrsm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(xtrsm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); - -int BLASFUNC(strmm)(char *, char *, char *, char *, int *, int *, - float *, float *, int *, float *, int *); -int BLASFUNC(dtrmm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(qtrmm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(ctrmm)(char *, char *, char *, char *, int *, int *, - float *, float *, int *, float *, int *); -int BLASFUNC(ztrmm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(xtrmm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); - -int BLASFUNC(ssymm)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(dsymm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(qsymm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(csymm)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zsymm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xsymm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); - -int BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); - -int BLASFUNC(ssyrk)(char *, char *, int *, int *, float *, float *, int *, - float *, float *, int *); -int BLASFUNC(dsyrk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); -int BLASFUNC(qsyrk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); -int BLASFUNC(csyrk)(char *, char *, int *, int *, float *, float *, int *, - float *, float *, int *); -int BLASFUNC(zsyrk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); -int BLASFUNC(xsyrk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); - -int BLASFUNC(ssyr2k)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(dsyr2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(qsyr2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(csyr2k)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zsyr2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(xsyr2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); - -int BLASFUNC(chemm)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zhemm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xhemm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); +int BLASFUNC(strsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *); +int BLASFUNC(dtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(qtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(ctrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *); +int BLASFUNC(ztrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(xtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *); + +int BLASFUNC(strmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *); +int BLASFUNC(dtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(qtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(ctrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *); +int BLASFUNC(ztrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *); +int BLASFUNC(xtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *); + +int BLASFUNC(ssymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(dsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(qsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(csymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); + +int BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *); +int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); +int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); + +int BLASFUNC(ssyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(dsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(qsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(csyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *); + +int BLASFUNC(ssyr2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(dsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *); +int BLASFUNC(qsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *); +int BLASFUNC(csyr2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *); +int BLASFUNC(xsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *); + +int BLASFUNC(chemm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); int BLASFUNC(chemm3m)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *); @@ -520,25 +451,16 @@ int BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *, int BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(cherk)(char *, char *, int *, int *, float *, float *, int *, - float *, float *, int *); -int BLASFUNC(zherk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); -int BLASFUNC(xherk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); - -int BLASFUNC(cher2k)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zher2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(xher2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(cher2m)(char *, char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zher2m)(char *, char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(xher2m)(char *, char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); +int BLASFUNC(cherk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *); + +int BLASFUNC(cher2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(xher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +int BLASFUNC(cher2m)(const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); +int BLASFUNC(zher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *); +int BLASFUNC(xher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *); int BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *, float *, int *); diff --git a/blas/common.h b/blas/common.h index 5ecb153e2..acb50af1b 100644 --- a/blas/common.h +++ b/blas/common.h @@ -104,18 +104,37 @@ matrix(T* data, int rows, int cols, int stride) return Map, 0, OuterStride<> >(data, rows, cols, OuterStride<>(stride)); } +template +Map, 0, OuterStride<> > +matrix(const T* data, int rows, int cols, int stride) +{ + return Map, 0, OuterStride<> >(data, rows, cols, OuterStride<>(stride)); +} + template Map, 0, InnerStride > make_vector(T* data, int size, int incr) { return Map, 0, InnerStride >(data, size, InnerStride(incr)); } +template +Map, 0, InnerStride > make_vector(const T* data, int size, int incr) +{ + return Map, 0, InnerStride >(data, size, InnerStride(incr)); +} + template Map > make_vector(T* data, int size) { return Map >(data, size); } +template +Map > make_vector(const T* data, int size) +{ + return Map >(data, size); +} + template T* get_compact_vector(T* x, int n, int incx) { diff --git a/blas/level3_impl.h b/blas/level3_impl.h index 267a727ef..beb36c47d 100644 --- a/blas/level3_impl.h +++ b/blas/level3_impl.h @@ -9,7 +9,8 @@ #include #include "common.h" -int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(gemm)(const char *opa, const char *opb, const int *m, const int *n, const int *k, const RealScalar *palpha, + const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { // std::cerr << "in gemm " << *opa << " " << *opb << " " << *m << " " << *n << " " << *k << " " << *lda << " " << *ldb << " " << *ldc << " " << *palpha << " " << *pbeta << "\n"; typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar, internal::level3_blocking&, Eigen::internal::GemmParallelInfo*); @@ -37,11 +38,11 @@ int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScal 0 }; - Scalar* a = reinterpret_cast(pa); - Scalar* b = reinterpret_cast(pb); + const Scalar* a = reinterpret_cast(pa); + const Scalar* b = reinterpret_cast(pb); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); int info = 0; if(OP(*opa)==INVALID) info = 1; @@ -74,7 +75,8 @@ int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScal return 0; } -int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb) +int EIGEN_BLAS_FUNC(trsm)(const char *side, const char *uplo, const char *opa, const char *diag, const int *m, const int *n, + const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb) { // std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n"; typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, internal::level3_blocking&); @@ -137,9 +139,9 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, 0 }; - Scalar* a = reinterpret_cast(pa); + const Scalar* a = reinterpret_cast(pa); Scalar* b = reinterpret_cast(pb); - Scalar alpha = *reinterpret_cast(palpha); + Scalar alpha = *reinterpret_cast(palpha); int info = 0; if(SIDE(*side)==INVALID) info = 1; @@ -178,7 +180,8 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, // b = alpha*op(a)*b for side = 'L'or'l' // b = alpha*b*op(a) for side = 'R'or'r' -int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb) +int EIGEN_BLAS_FUNC(trmm)(const char *side, const char *uplo, const char *opa, const char *diag, const int *m, const int *n, + const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb) { // std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n"; typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking&); @@ -241,9 +244,9 @@ int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, 0 }; - Scalar* a = reinterpret_cast(pa); + const Scalar* a = reinterpret_cast(pa); Scalar* b = reinterpret_cast(pb); - Scalar alpha = *reinterpret_cast(palpha); + Scalar alpha = *reinterpret_cast(palpha); int info = 0; if(SIDE(*side)==INVALID) info = 1; @@ -281,14 +284,15 @@ int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, // c = alpha*a*b + beta*c for side = 'L'or'l' // c = alpha*b*a + beta*c for side = 'R'or'r -int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(symm)(const char *side, const char *uplo, const int *m, const int *n, const RealScalar *palpha, + const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { // std::cerr << "in symm " << *side << " " << *uplo << " " << *m << "x" << *n << " lda:" << *lda << " ldb:" << *ldb << " ldc:" << *ldc << " alpha:" << *palpha << " beta:" << *pbeta << "\n"; - Scalar* a = reinterpret_cast(pa); - Scalar* b = reinterpret_cast(pb); + const Scalar* a = reinterpret_cast(pa); + const Scalar* b = reinterpret_cast(pb); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); int info = 0; if(SIDE(*side)==INVALID) info = 1; @@ -350,7 +354,8 @@ int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *pa // c = alpha*a*a' + beta*c for op = 'N'or'n' // c = alpha*a'*a + beta*c for op = 'T'or't','C'or'c' -int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(syrk)(const char *uplo, const char *op, const int *n, const int *k, + const RealScalar *palpha, const RealScalar *pa, const int *lda, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { // std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n"; #if !ISCOMPLEX @@ -373,10 +378,10 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp }; #endif - Scalar* a = reinterpret_cast(pa); + const Scalar* a = reinterpret_cast(pa); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); int info = 0; if(UPLO(*uplo)==INVALID) info = 1; @@ -429,13 +434,14 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp // c = alpha*a*b' + alpha*b*a' + beta*c for op = 'N'or'n' // c = alpha*a'*b + alpha*b'*a + beta*c for op = 'T'or't' -int EIGEN_BLAS_FUNC(syr2k)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(syr2k)(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha, + const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { - Scalar* a = reinterpret_cast(pa); - Scalar* b = reinterpret_cast(pb); + const Scalar* a = reinterpret_cast(pa); + const Scalar* b = reinterpret_cast(pb); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); // std::cerr << "in syr2k " << *uplo << " " << *op << " " << *n << " " << *k << " " << alpha << " " << *lda << " " << *ldb << " " << beta << " " << *ldc << "\n"; @@ -496,13 +502,14 @@ int EIGEN_BLAS_FUNC(syr2k)(char *uplo, char *op, int *n, int *k, RealScalar *pal // c = alpha*a*b + beta*c for side = 'L'or'l' // c = alpha*b*a + beta*c for side = 'R'or'r -int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(hemm)(const char *side, const char *uplo, const int *m, const int *n, const RealScalar *palpha, + const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { - Scalar* a = reinterpret_cast(pa); - Scalar* b = reinterpret_cast(pb); + const Scalar* a = reinterpret_cast(pa); + const Scalar* b = reinterpret_cast(pb); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); // std::cerr << "in hemm " << *side << " " << *uplo << " " << *m << " " << *n << " " << alpha << " " << *lda << " " << beta << " " << *ldc << "\n"; @@ -554,7 +561,8 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa // c = alpha*a*conj(a') + beta*c for op = 'N'or'n' // c = alpha*conj(a')*a + beta*c for op = 'C'or'c' -int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(herk)(const char *uplo, const char *op, const int *n, const int *k, + const RealScalar *palpha, const RealScalar *pa, const int *lda, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { // std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n"; @@ -574,7 +582,7 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp 0 }; - Scalar* a = reinterpret_cast(pa); + const Scalar* a = reinterpret_cast(pa); Scalar* c = reinterpret_cast(pc); RealScalar alpha = *palpha; RealScalar beta = *pbeta; @@ -620,12 +628,13 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp // c = alpha*a*conj(b') + conj(alpha)*b*conj(a') + beta*c, for op = 'N'or'n' // c = alpha*conj(a')*b + conj(alpha)*conj(b')*a + beta*c, for op = 'C'or'c' -int EIGEN_BLAS_FUNC(her2k)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(her2k)(const char *uplo, const char *op, const int *n, const int *k, + const RealScalar *palpha, const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { - Scalar* a = reinterpret_cast(pa); - Scalar* b = reinterpret_cast(pb); + const Scalar* a = reinterpret_cast(pa); + const Scalar* b = reinterpret_cast(pb); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); + Scalar alpha = *reinterpret_cast(palpha); RealScalar beta = *pbeta; // std::cerr << "in her2k " << *uplo << " " << *op << " " << *n << " " << *k << " " << alpha << " " << *lda << " " << *ldb << " " << beta << " " << *ldc << "\n"; -- cgit v1.2.3 From 91bf925fc17c50a7898d84e56ce3fbbd93e7d920 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 11 Apr 2016 17:13:01 +0200 Subject: Improve constness of level2 blas API. --- blas/common.h | 10 +++++----- blas/level1_impl.h | 6 +++--- blas/level2_cplx_impl.h | 13 +++++++------ blas/level2_impl.h | 33 +++++++++++++++++---------------- blas/level2_real_impl.h | 33 +++++++++++++++++---------------- lapack/lapack_common.h | 1 + 6 files changed, 50 insertions(+), 46 deletions(-) (limited to 'blas') diff --git a/blas/common.h b/blas/common.h index acb50af1b..61d8344d9 100644 --- a/blas/common.h +++ b/blas/common.h @@ -10,8 +10,8 @@ #ifndef EIGEN_BLAS_COMMON_H #define EIGEN_BLAS_COMMON_H -#include -#include +#include "../Eigen/Core" +#include "../Eigen/Jacobi" #include @@ -19,8 +19,7 @@ #error the token SCALAR must be defined to compile this file #endif -#include - +#include "../Eigen/src/misc/blas.h" #define NOTR 0 #define TR 1 @@ -94,6 +93,7 @@ enum typedef Matrix PlainMatrixType; typedef Map, 0, OuterStride<> > MatrixType; +typedef Map, 0, OuterStride<> > ConstMatrixType; typedef Map, 0, InnerStride > StridedVectorType; typedef Map > CompactVectorType; @@ -141,7 +141,7 @@ T* get_compact_vector(T* x, int n, int incx) if(incx==1) return x; - T* ret = new Scalar[n]; + typename Eigen::internal::remove_const::type* ret = new Scalar[n]; if(incx<0) make_vector(ret,n) = make_vector(x,n,-incx).reverse(); else make_vector(ret,n) = make_vector(x,n, incx); return ret; diff --git a/blas/level1_impl.h b/blas/level1_impl.h index e623bd178..f857bfa20 100644 --- a/blas/level1_impl.h +++ b/blas/level1_impl.h @@ -9,11 +9,11 @@ #include "common.h" -int EIGEN_BLAS_FUNC(axpy)(int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy) +int EIGEN_BLAS_FUNC(axpy)(const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, RealScalar *py, const int *incy) { - Scalar* x = reinterpret_cast(px); + const Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); - Scalar alpha = *reinterpret_cast(palpha); + Scalar alpha = *reinterpret_cast(palpha); if(*n<=0) return 0; diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h index 2edc51596..e3ce61435 100644 --- a/blas/level2_cplx_impl.h +++ b/blas/level2_cplx_impl.h @@ -16,7 +16,8 @@ * where alpha and beta are scalars, x and y are n element vectors and * A is an n by n hermitian matrix. */ -int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) +int EIGEN_BLAS_FUNC(hemv)(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda, + const RealScalar *px, const int *incx, const RealScalar *pbeta, RealScalar *py, const int *incy) { typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar); static const functype func[2] = { @@ -26,11 +27,11 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa (internal::selfadjoint_matrix_vector_product::run), }; - Scalar* a = reinterpret_cast(pa); - Scalar* x = reinterpret_cast(px); + const Scalar* a = reinterpret_cast(pa); + const Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); // check arguments int info = 0; @@ -45,7 +46,7 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa if(*n==0) return 1; - Scalar* actual_x = get_compact_vector(x,*n,*incx); + const Scalar* actual_x = get_compact_vector(x,*n,*incx); Scalar* actual_y = get_compact_vector(y,*n,*incy); if(beta!=Scalar(1)) diff --git a/blas/level2_impl.h b/blas/level2_impl.h index d09db0cc6..173f40b44 100644 --- a/blas/level2_impl.h +++ b/blas/level2_impl.h @@ -23,7 +23,8 @@ struct general_matrix_vector_product_wrapper } }; -int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *incb, RealScalar *pbeta, RealScalar *pc, int *incc) +int EIGEN_BLAS_FUNC(gemv)(const char *opa, const int *m, const int *n, const RealScalar *palpha, + const RealScalar *pa, const int *lda, const RealScalar *pb, const int *incb, const RealScalar *pbeta, RealScalar *pc, const int *incc) { typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int , Scalar *, int, Scalar); static const functype func[4] = { @@ -36,11 +37,11 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca 0 }; - Scalar* a = reinterpret_cast(pa); - Scalar* b = reinterpret_cast(pb); + const Scalar* a = reinterpret_cast(pa); + const Scalar* b = reinterpret_cast(pb); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); // check arguments int info = 0; @@ -62,7 +63,7 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca if(code!=NOTR) std::swap(actual_m,actual_n); - Scalar* actual_b = get_compact_vector(b,actual_n,*incb); + const Scalar* actual_b = get_compact_vector(b,actual_n,*incb); Scalar* actual_c = get_compact_vector(c,actual_m,*incc); if(beta!=Scalar(1)) @@ -82,7 +83,7 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca return 1; } -int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb) +int EIGEN_BLAS_FUNC(trsv)(const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa, const int *lda, RealScalar *pb, const int *incb) { typedef void (*functype)(int, const Scalar *, int, Scalar *); static const functype func[16] = { @@ -116,7 +117,7 @@ int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar 0 }; - Scalar* a = reinterpret_cast(pa); + const Scalar* a = reinterpret_cast(pa); Scalar* b = reinterpret_cast(pb); int info = 0; @@ -141,7 +142,7 @@ int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar -int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb) +int EIGEN_BLAS_FUNC(trmv)(const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa, const int *lda, RealScalar *pb, const int *incb) { typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, const Scalar&); static const functype func[16] = { @@ -175,7 +176,7 @@ int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar 0 }; - Scalar* a = reinterpret_cast(pa); + const Scalar* a = reinterpret_cast(pa); Scalar* b = reinterpret_cast(pb); int info = 0; @@ -217,11 +218,11 @@ int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) { - Scalar* a = reinterpret_cast(pa); - Scalar* x = reinterpret_cast(px); + const Scalar* a = reinterpret_cast(pa); + const Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); int coeff_rows = *kl+*ku+1; int info = 0; @@ -244,7 +245,7 @@ int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealSca if(OP(*trans)!=NOTR) std::swap(actual_m,actual_n); - Scalar* actual_x = get_compact_vector(x,actual_n,*incx); + const Scalar* actual_x = get_compact_vector(x,actual_n,*incx); Scalar* actual_y = get_compact_vector(y,actual_m,*incy); if(beta!=Scalar(1)) @@ -253,7 +254,7 @@ int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealSca else make_vector(actual_y, actual_m) *= beta; } - MatrixType mat_coeffs(a,coeff_rows,*n,*lda); + ConstMatrixType mat_coeffs(a,coeff_rows,*n,*lda); int nb = std::min(*n,(*m)+(*ku)); for(int j=0; j::run), }; - Scalar* a = reinterpret_cast(pa); - Scalar* x = reinterpret_cast(px); + const Scalar* a = reinterpret_cast(pa); + const Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); // check arguments int info = 0; @@ -39,7 +40,7 @@ int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *p if(*n==0) return 0; - Scalar* actual_x = get_compact_vector(x,*n,*incx); + const Scalar* actual_x = get_compact_vector(x,*n,*incx); Scalar* actual_y = get_compact_vector(y,*n,*incy); if(beta!=Scalar(1)) @@ -61,7 +62,7 @@ int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *p } // C := alpha*x*x' + C -int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(syr)(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, RealScalar *pc, const int *ldc) { typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, const Scalar&); @@ -72,9 +73,9 @@ int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, (selfadjoint_rank1_update::run), }; - Scalar* x = reinterpret_cast(px); + const Scalar* x = reinterpret_cast(px); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); + Scalar alpha = *reinterpret_cast(palpha); int info = 0; if(UPLO(*uplo)==INVALID) info = 1; @@ -87,7 +88,7 @@ int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, if(*n==0 || alpha==Scalar(0)) return 1; // if the increment is not 1, let's copy it to a temporary vector to enable vectorization - Scalar* x_cpy = get_compact_vector(x,*n,*incx); + const Scalar* x_cpy = get_compact_vector(x,*n,*incx); int code = UPLO(*uplo); if(code>=2 || func[code]==0) @@ -101,7 +102,7 @@ int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, } // C := alpha*x*y' + alpha*y*x' + C -int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(syr2)(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, const RealScalar *py, const int *incy, RealScalar *pc, const int *ldc) { typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar); static const functype func[2] = { @@ -111,10 +112,10 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px (internal::rank2_update_selector::run), }; - Scalar* x = reinterpret_cast(px); - Scalar* y = reinterpret_cast(py); + const Scalar* x = reinterpret_cast(px); + const Scalar* y = reinterpret_cast(py); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); + Scalar alpha = *reinterpret_cast(palpha); int info = 0; if(UPLO(*uplo)==INVALID) info = 1; @@ -128,8 +129,8 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px if(alpha==Scalar(0)) return 1; - Scalar* x_cpy = get_compact_vector(x,*n,*incx); - Scalar* y_cpy = get_compact_vector(y,*n,*incy); + const Scalar* x_cpy = get_compact_vector(x,*n,*incx); + const Scalar* y_cpy = get_compact_vector(y,*n,*incy); int code = UPLO(*uplo); if(code>=2 || func[code]==0) diff --git a/lapack/lapack_common.h b/lapack/lapack_common.h index a93598784..c872a813e 100644 --- a/lapack/lapack_common.h +++ b/lapack/lapack_common.h @@ -11,6 +11,7 @@ #define EIGEN_LAPACK_COMMON_H #include "../blas/common.h" +#include "../Eigen/src/misc/lapack.h" #define EIGEN_LAPACK_FUNC(FUNC,ARGLIST) \ extern "C" { int EIGEN_BLAS_FUNC(FUNC) ARGLIST; } \ -- cgit v1.2.3