4 files changed, 236 insertions, 12 deletions
diff --git a/blas/BandTriangularSolver.h b/blas/BandTriangularSolver.h
new file mode 100644
index 000000000..420221aea
--- /dev/null
+++ b/blas/BandTriangularSolver.h
@@ -0,0 +1,112 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_BAND_TRIANGULARSOLVER_H
+#define EIGEN_BAND_TRIANGULARSOLVER_H
+
+namespace internal {
+
+ /* \internal
+  * Solve Ax=b with A a band triangular matrix
+  * TODO: extend it to matrices for x abd b */
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, int StorageOrder>
+struct band_solve_triangular_selector;
+
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar>
+struct band_solve_triangular_selector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,RowMajor>
+{
+  typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;
+  typedef Map<Matrix<RhsScalar,Dynamic,1> > RhsMap;
+  enum { IsLower = (Mode&Lower) ? 1 : 0 };
+  static void run(Index size, Index k, const LhsScalar* _lhs, Index lhsStride, RhsScalar* _other)
+  {
+    const LhsMap lhs(_lhs,size,k+1,OuterStride<>(lhsStride));
+    RhsMap other(_other,size,1);
+    typename internal::conditional<
+                          ConjLhs,
+                          const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
+                          const LhsMap&>
+                        ::type cjLhs(lhs);
+                        
+    for(int col=0 ; col<other.cols() ; ++col)
+    {
+      for(int ii=0; ii<size; ++ii)
+      {
+        int i = IsLower ? ii : size-ii-1;
+        int actual_k = (std::min)(k,ii);
+        int actual_start = IsLower ? k-actual_k : 1;
+        
+        if(actual_k>0)
+          other.coeffRef(i,col) -= cjLhs.row(i).segment(actual_start,actual_k).transpose()
+                                  .cwiseProduct(other.col(col).segment(IsLower ? i-actual_k : i+1,actual_k)).sum();
+
+        if((Mode&UnitDiag)==0)
+          other.coeffRef(i,col) /= cjLhs(i,IsLower ? k : 0);
+      }
+    }
+  }
+  
+};
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar>
+struct band_solve_triangular_selector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ColMajor>
+{
+  typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;
+  typedef Map<Matrix<RhsScalar,Dynamic,1> > RhsMap;
+  enum { IsLower = (Mode&Lower) ? 1 : 0 };
+  static void run(Index size, Index k, const LhsScalar* _lhs, Index lhsStride, RhsScalar* _other)
+  {
+    const LhsMap lhs(_lhs,k+1,size,OuterStride<>(lhsStride));
+    RhsMap other(_other,size,1);
+    typename internal::conditional<
+                          ConjLhs,
+                          const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
+                          const LhsMap&>
+                        ::type cjLhs(lhs);
+                        
+    for(int col=0 ; col<other.cols() ; ++col)
+    {
+      for(int ii=0; ii<size; ++ii)
+      {
+        int i = IsLower ? ii : size-ii-1;
+        int actual_k = (std::min)(k,size-ii-1);
+        int actual_start = IsLower ? 1 : k-actual_k;
+        
+        if((Mode&UnitDiag)==0)
+          other.coeffRef(i,col) /= cjLhs(IsLower ? 0 : k, i);
+
+        if(actual_k>0)
+          other.col(col).segment(IsLower ? i+1 : i-actual_k, actual_k)
+              -= other.coeff(i,col) * cjLhs.col(i).segment(actual_start,actual_k);
+        
+      }
+    }
+  }
+};
+
+
+} // end namespace internal
+
+#endif // EIGEN_BAND_TRIANGULARSOLVER_H
diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt
index 55440f4dc..453d5874c 100644
--- a/blas/CMakeLists.txt
+++ b/blas/CMakeLists.txt
@@ -18,10 +18,11 @@ if(EIGEN_Fortran_COMPILER_WORKS)
 set(EigenBlas_SRCS ${EigenBlas_SRCS}
     complexdots.f
     srotm.f srotmg.f drotm.f drotmg.f
-    lsame.f   chpr2.f  ctbsv.f  dspmv.f  dtbmv.f  dtpsv.f ssbmv.f  sspr.f   stpmv.f
-    zhpr2.f  ztbsv.f chbmv.f  chpr.f   ctpmv.f     dspr2.f dtbsv.f  sspmv.f  stbmv.f  stpsv.f
-    zhbmv.f  zhpr.f   ztpmv.f chpmv.f  ctbmv.f  ctpsv.f    dsbmv.f  dspr.f   dtpmv.f   sspr2.f
-    stbsv.f  zhpmv.f  ztbmv.f  ztpsv.f
+    lsame.f   chpr2.f  dspmv.f    dtpsv.f ssbmv.f  sspr.f   stpmv.f
+    zhpr2.f  chbmv.f  chpr.f   ctpmv.f     dspr2.f  sspmv.f    stpsv.f
+    zhbmv.f  zhpr.f   ztpmv.f chpmv.f   ctpsv.f    dsbmv.f  dspr.f   dtpmv.f   sspr2.f
+    zhpmv.f    ztpsv.f
+    dtbmv.f stbmv.f ctbmv.f ztbmv.f
 )
 else()
 
diff --git a/blas/common.h b/blas/common.h
index a63a5d286..714bce574 100644
--- a/blas/common.h
+++ b/blas/common.h
@@ -93,6 +93,12 @@ inline bool check_uplo(const char* uplo)
 
 #include <Eigen/Core>
 #include <Eigen/Jacobi>
+
+
+namespace Eigen {
+#include "BandTriangularSolver.h"
+}
+
 using namespace Eigen;
 
 typedef SCALAR Scalar;
diff --git a/blas/level2_impl.h b/blas/level2_impl.h
index 8cbc2f424..0781fa56a 100644
--- a/blas/level2_impl.h
+++ b/blas/level2_impl.h
@@ -271,6 +271,7 @@ int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealSca
   return 0;
 }
 
+#if 0
 /**  TBMV  performs one of the matrix-vector operations
   *
   *     x := A*x,   or   x := A'*x,
@@ -278,10 +279,56 @@ int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealSca
   *  where x is an n element vector and  A is an n by n unit, or non-unit,
   *  upper or lower triangular band matrix, with ( k + 1 ) diagonals.
   */
-// int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *trans, char *diag, int *n, int *k, RealScalar *a, int *lda, RealScalar *x, int *incx)
-// {
-//   return 1;
-// }
+int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *opa, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx)
+{
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  int coeff_rows = *k + 1;
+  
+  int info = 0;
+       if(UPLO(*uplo)==INVALID)                                       info = 1;
+  else if(OP(*opa)==INVALID)                                          info = 2;
+  else if(DIAG(*diag)==INVALID)                                       info = 3;
+  else if(*n<0)                                                       info = 4;
+  else if(*k<0)                                                       info = 5;
+  else if(*lda<coeff_rows)                                            info = 7;
+  else if(*incx==0)                                                   info = 9;
+  if(info)
+    return xerbla_(SCALAR_SUFFIX_UP"TBMV ",&info,6);
+  
+  if(*n==0)
+    return 0;
+  
+  int actual_n = *n;
+  
+  Scalar* actual_x = get_compact_vector(x,actual_n,*incx);
+  
+  MatrixType mat_coeffs(a,coeff_rows,*n,*lda);
+  
+  int ku = UPLO(*uplo)==UPPER ? *k : 0;
+  int kl = UPLO(*uplo)==LOWER ? *k : 0;
+  
+  for(int j=0; j<*n; ++j)
+  {
+    int start = std::max(0,j - ku);
+    int end = std::min((*m)-1,j + kl);
+    int len = end - start + 1;
+    int offset = (ku) - j + start;
+    
+    if(OP(*trans)==NOTR)
+      vector(actual_y+start,len) += (alpha*actual_x[j]) * mat_coeffs.col(j).segment(offset,len);
+    else if(OP(*trans)==TR)
+      actual_y[j] += alpha * ( mat_coeffs.col(j).segment(offset,len).transpose() * vector(actual_x+start,len) ).value();
+    else
+      actual_y[j] += alpha * ( mat_coeffs.col(j).segment(offset,len).adjoint()   * vector(actual_x+start,len) ).value();
+  }    
+  
+  if(actual_x!=x) delete[] actual_x;
+  if(actual_y!=y) delete[] copy_back(actual_y,y,actual_m,*incy);
+  
+  return 0;
+}
+#endif
 
 /**  DTBSV  solves one of the systems of equations
   *
@@ -294,10 +341,68 @@ int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealSca
   *  No test for singularity or near-singularity is included in this
   *  routine. Such tests must be performed before calling this routine.
   */
-// int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *trans, char *diag, int *n, int *k, RealScalar *a, int *lda, RealScalar *x, int *incx)
-// {
-//   return 1;
-// }
+int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx)
+{
+  typedef void (*functype)(int, int, const Scalar *, int, Scalar *);
+  static functype func[16];
+
+  static bool init = false;
+  if(!init)
+  {
+    for(int k=0; k<16; ++k)
+      func[k] = 0;
+
+    func[NOTR  | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|0,       Scalar,false,Scalar,ColMajor>::run);
+    func[TR    | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|0,       Scalar,false,Scalar,RowMajor>::run);
+    func[ADJ   | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|0,       Scalar,Conj, Scalar,RowMajor>::run);
+
+    func[NOTR  | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|0,       Scalar,false,Scalar,ColMajor>::run);
+    func[TR    | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|0,       Scalar,false,Scalar,RowMajor>::run);
+    func[ADJ   | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|0,       Scalar,Conj, Scalar,RowMajor>::run);
+
+    func[NOTR  | (UP << 2) | (UNIT  << 3)] = (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,false,Scalar,ColMajor>::run);
+    func[TR    | (UP << 2) | (UNIT  << 3)] = (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,false,Scalar,RowMajor>::run);
+    func[ADJ   | (UP << 2) | (UNIT  << 3)] = (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,Conj, Scalar,RowMajor>::run);
+
+    func[NOTR  | (LO << 2) | (UNIT  << 3)] = (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,false,Scalar,ColMajor>::run);
+    func[TR    | (LO << 2) | (UNIT  << 3)] = (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,false,Scalar,RowMajor>::run);
+    func[ADJ   | (LO << 2) | (UNIT  << 3)] = (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,Conj, Scalar,RowMajor>::run);
+
+    init = true;
+  }
+
+  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar* x = reinterpret_cast<Scalar*>(px);
+  int coeff_rows = *k+1;
+  
+  int info = 0;
+       if(UPLO(*uplo)==INVALID)                                       info = 1;
+  else if(OP(*op)==INVALID)                                           info = 2;
+  else if(DIAG(*diag)==INVALID)                                       info = 3;
+  else if(*n<0)                                                       info = 4;
+  else if(*k<0)                                                       info = 5;
+  else if(*lda<coeff_rows)                                            info = 7;
+  else if(*incx==0)                                                   info = 9;
+  if(info)
+    return xerbla_(SCALAR_SUFFIX_UP"TBSV ",&info,6);
+  
+  if(*n==0 || (*k==0 && DIAG(*diag)==UNIT))
+    return 0;
+  
+  int actual_n = *n;
+ 
+  Scalar* actual_x = get_compact_vector(x,actual_n,*incx);
+  
+  int code = OP(*op) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3);
+  if(code>=16 || func[code]==0)
+    return 0;
+
+  func[code](*n, *k, a, *lda, actual_x);
+  
+  if(actual_x!=x) delete[] copy_back(actual_x,x,actual_n,*incx);
+  
+  return 0;
+}
 
 /**  DTPMV  performs one of the matrix-vector operations
   *