diff options
-rw-r--r-- | blas/BandTriangularSolver.h | 112 | ||||
-rw-r--r-- | blas/CMakeLists.txt | 9 | ||||
-rw-r--r-- | blas/common.h | 6 | ||||
-rw-r--r-- | blas/level2_impl.h | 121 |
4 files changed, 236 insertions, 12 deletions
diff --git a/blas/BandTriangularSolver.h b/blas/BandTriangularSolver.h new file mode 100644 index 000000000..420221aea --- /dev/null +++ b/blas/BandTriangularSolver.h @@ -0,0 +1,112 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see <http://www.gnu.org/licenses/>. + +#ifndef EIGEN_BAND_TRIANGULARSOLVER_H +#define EIGEN_BAND_TRIANGULARSOLVER_H + +namespace internal { + + /* \internal + * Solve Ax=b with A a band triangular matrix + * TODO: extend it to matrices for x abd b */ +template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, int StorageOrder> +struct band_solve_triangular_selector; + + +template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar> +struct band_solve_triangular_selector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,RowMajor> +{ + typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap; + typedef Map<Matrix<RhsScalar,Dynamic,1> > RhsMap; + enum { IsLower = (Mode&Lower) ? 1 : 0 }; + static void run(Index size, Index k, const LhsScalar* _lhs, Index lhsStride, RhsScalar* _other) + { + const LhsMap lhs(_lhs,size,k+1,OuterStride<>(lhsStride)); + RhsMap other(_other,size,1); + typename internal::conditional< + ConjLhs, + const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>, + const LhsMap&> + ::type cjLhs(lhs); + + for(int col=0 ; col<other.cols() ; ++col) + { + for(int ii=0; ii<size; ++ii) + { + int i = IsLower ? ii : size-ii-1; + int actual_k = (std::min)(k,ii); + int actual_start = IsLower ? k-actual_k : 1; + + if(actual_k>0) + other.coeffRef(i,col) -= cjLhs.row(i).segment(actual_start,actual_k).transpose() + .cwiseProduct(other.col(col).segment(IsLower ? i-actual_k : i+1,actual_k)).sum(); + + if((Mode&UnitDiag)==0) + other.coeffRef(i,col) /= cjLhs(i,IsLower ? k : 0); + } + } + } + +}; + +template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar> +struct band_solve_triangular_selector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ColMajor> +{ + typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap; + typedef Map<Matrix<RhsScalar,Dynamic,1> > RhsMap; + enum { IsLower = (Mode&Lower) ? 1 : 0 }; + static void run(Index size, Index k, const LhsScalar* _lhs, Index lhsStride, RhsScalar* _other) + { + const LhsMap lhs(_lhs,k+1,size,OuterStride<>(lhsStride)); + RhsMap other(_other,size,1); + typename internal::conditional< + ConjLhs, + const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>, + const LhsMap&> + ::type cjLhs(lhs); + + for(int col=0 ; col<other.cols() ; ++col) + { + for(int ii=0; ii<size; ++ii) + { + int i = IsLower ? ii : size-ii-1; + int actual_k = (std::min)(k,size-ii-1); + int actual_start = IsLower ? 1 : k-actual_k; + + if((Mode&UnitDiag)==0) + other.coeffRef(i,col) /= cjLhs(IsLower ? 0 : k, i); + + if(actual_k>0) + other.col(col).segment(IsLower ? i+1 : i-actual_k, actual_k) + -= other.coeff(i,col) * cjLhs.col(i).segment(actual_start,actual_k); + + } + } + } +}; + + +} // end namespace internal + +#endif // EIGEN_BAND_TRIANGULARSOLVER_H diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt index 55440f4dc..453d5874c 100644 --- a/blas/CMakeLists.txt +++ b/blas/CMakeLists.txt @@ -18,10 +18,11 @@ if(EIGEN_Fortran_COMPILER_WORKS) set(EigenBlas_SRCS ${EigenBlas_SRCS} complexdots.f srotm.f srotmg.f drotm.f drotmg.f - lsame.f chpr2.f ctbsv.f dspmv.f dtbmv.f dtpsv.f ssbmv.f sspr.f stpmv.f - zhpr2.f ztbsv.f chbmv.f chpr.f ctpmv.f dspr2.f dtbsv.f sspmv.f stbmv.f stpsv.f - zhbmv.f zhpr.f ztpmv.f chpmv.f ctbmv.f ctpsv.f dsbmv.f dspr.f dtpmv.f sspr2.f - stbsv.f zhpmv.f ztbmv.f ztpsv.f + lsame.f chpr2.f dspmv.f dtpsv.f ssbmv.f sspr.f stpmv.f + zhpr2.f chbmv.f chpr.f ctpmv.f dspr2.f sspmv.f stpsv.f + zhbmv.f zhpr.f ztpmv.f chpmv.f ctpsv.f dsbmv.f dspr.f dtpmv.f sspr2.f + zhpmv.f ztpsv.f + dtbmv.f stbmv.f ctbmv.f ztbmv.f ) else() diff --git a/blas/common.h b/blas/common.h index a63a5d286..714bce574 100644 --- a/blas/common.h +++ b/blas/common.h @@ -93,6 +93,12 @@ inline bool check_uplo(const char* uplo) #include <Eigen/Core> #include <Eigen/Jacobi> + + +namespace Eigen { +#include "BandTriangularSolver.h" +} + using namespace Eigen; typedef SCALAR Scalar; diff --git a/blas/level2_impl.h b/blas/level2_impl.h index 8cbc2f424..0781fa56a 100644 --- a/blas/level2_impl.h +++ b/blas/level2_impl.h @@ -271,6 +271,7 @@ int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealSca return 0; } +#if 0 /** TBMV performs one of the matrix-vector operations * * x := A*x, or x := A'*x, @@ -278,10 +279,56 @@ int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealSca * where x is an n element vector and A is an n by n unit, or non-unit, * upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ -// int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *trans, char *diag, int *n, int *k, RealScalar *a, int *lda, RealScalar *x, int *incx) -// { -// return 1; -// } +int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *opa, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx) +{ + Scalar* a = reinterpret_cast<Scalar*>(pa); + Scalar* x = reinterpret_cast<Scalar*>(px); + int coeff_rows = *k + 1; + + int info = 0; + if(UPLO(*uplo)==INVALID) info = 1; + else if(OP(*opa)==INVALID) info = 2; + else if(DIAG(*diag)==INVALID) info = 3; + else if(*n<0) info = 4; + else if(*k<0) info = 5; + else if(*lda<coeff_rows) info = 7; + else if(*incx==0) info = 9; + if(info) + return xerbla_(SCALAR_SUFFIX_UP"TBMV ",&info,6); + + if(*n==0) + return 0; + + int actual_n = *n; + + Scalar* actual_x = get_compact_vector(x,actual_n,*incx); + + MatrixType mat_coeffs(a,coeff_rows,*n,*lda); + + int ku = UPLO(*uplo)==UPPER ? *k : 0; + int kl = UPLO(*uplo)==LOWER ? *k : 0; + + for(int j=0; j<*n; ++j) + { + int start = std::max(0,j - ku); + int end = std::min((*m)-1,j + kl); + int len = end - start + 1; + int offset = (ku) - j + start; + + if(OP(*trans)==NOTR) + vector(actual_y+start,len) += (alpha*actual_x[j]) * mat_coeffs.col(j).segment(offset,len); + else if(OP(*trans)==TR) + actual_y[j] += alpha * ( mat_coeffs.col(j).segment(offset,len).transpose() * vector(actual_x+start,len) ).value(); + else + actual_y[j] += alpha * ( mat_coeffs.col(j).segment(offset,len).adjoint() * vector(actual_x+start,len) ).value(); + } + + if(actual_x!=x) delete[] actual_x; + if(actual_y!=y) delete[] copy_back(actual_y,y,actual_m,*incy); + + return 0; +} +#endif /** DTBSV solves one of the systems of equations * @@ -294,10 +341,68 @@ int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealSca * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. */ -// int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *trans, char *diag, int *n, int *k, RealScalar *a, int *lda, RealScalar *x, int *incx) -// { -// return 1; -// } +int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx) +{ + typedef void (*functype)(int, int, const Scalar *, int, Scalar *); + static functype func[16]; + + static bool init = false; + if(!init) + { + for(int k=0; k<16; ++k) + func[k] = 0; + + func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|0, Scalar,false,Scalar,ColMajor>::run); + func[TR | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|0, Scalar,false,Scalar,RowMajor>::run); + func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|0, Scalar,Conj, Scalar,RowMajor>::run); + + func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|0, Scalar,false,Scalar,ColMajor>::run); + func[TR | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|0, Scalar,false,Scalar,RowMajor>::run); + func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|0, Scalar,Conj, Scalar,RowMajor>::run); + + func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,false,Scalar,ColMajor>::run); + func[TR | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,false,Scalar,RowMajor>::run); + func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,Conj, Scalar,RowMajor>::run); + + func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,false,Scalar,ColMajor>::run); + func[TR | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,false,Scalar,RowMajor>::run); + func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,Conj, Scalar,RowMajor>::run); + + init = true; + } + + Scalar* a = reinterpret_cast<Scalar*>(pa); + Scalar* x = reinterpret_cast<Scalar*>(px); + int coeff_rows = *k+1; + + int info = 0; + if(UPLO(*uplo)==INVALID) info = 1; + else if(OP(*op)==INVALID) info = 2; + else if(DIAG(*diag)==INVALID) info = 3; + else if(*n<0) info = 4; + else if(*k<0) info = 5; + else if(*lda<coeff_rows) info = 7; + else if(*incx==0) info = 9; + if(info) + return xerbla_(SCALAR_SUFFIX_UP"TBSV ",&info,6); + + if(*n==0 || (*k==0 && DIAG(*diag)==UNIT)) + return 0; + + int actual_n = *n; + + Scalar* actual_x = get_compact_vector(x,actual_n,*incx); + + int code = OP(*op) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3); + if(code>=16 || func[code]==0) + return 0; + + func[code](*n, *k, a, *lda, actual_x); + + if(actual_x!=x) delete[] copy_back(actual_x,x,actual_n,*incx); + + return 0; +} /** DTPMV performs one of the matrix-vector operations * |