aboutsummaryrefslogtreecommitdiffhomepage
path: root/blas
diff options
context:
space:
mode:
authorGravatar Chen-Pang He <jdh8@ms63.hinet.net>2012-09-08 17:29:44 +0800
committerGravatar Chen-Pang He <jdh8@ms63.hinet.net>2012-09-08 17:29:44 +0800
commite4e7585a24a4ef08742b4c198ab6e37e93eececf (patch)
tree66cdac8952a48b9d6a25e8222b41e0a5b309ba46 /blas
parentb5f9bec8ac2771abbd74e7abd03734d578a88e80 (diff)
Implement rank-2 update for packed matrices.
Diffstat (limited to 'blas')
-rw-r--r--blas/CMakeLists.txt8
-rw-r--r--blas/Rank2Update.h54
-rw-r--r--blas/level2_cplx_impl.h51
-rw-r--r--blas/level2_real_impl.h51
4 files changed, 146 insertions, 18 deletions
diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt
index 453d5874c..e46fde4d4 100644
--- a/blas/CMakeLists.txt
+++ b/blas/CMakeLists.txt
@@ -18,10 +18,10 @@ if(EIGEN_Fortran_COMPILER_WORKS)
set(EigenBlas_SRCS ${EigenBlas_SRCS}
complexdots.f
srotm.f srotmg.f drotm.f drotmg.f
- lsame.f chpr2.f dspmv.f dtpsv.f ssbmv.f sspr.f stpmv.f
- zhpr2.f chbmv.f chpr.f ctpmv.f dspr2.f sspmv.f stpsv.f
- zhbmv.f zhpr.f ztpmv.f chpmv.f ctpsv.f dsbmv.f dspr.f dtpmv.f sspr2.f
- zhpmv.f ztpsv.f
+ lsame.f dspmv.f dtpsv.f ssbmv.f sspr.f stpmv.f
+ chbmv.f chpr.f ctpmv.f sspmv.f stpsv.f
+ zhbmv.f zhpr.f ztpmv.f chpmv.f ctpsv.f dsbmv.f dspr.f dtpmv.f
+ zhpmv.f ztpsv.f
dtbmv.f stbmv.f ctbmv.f ztbmv.f
)
else()
diff --git a/blas/Rank2Update.h b/blas/Rank2Update.h
index e7a5eeaba..0cf3a1961 100644
--- a/blas/Rank2Update.h
+++ b/blas/Rank2Update.h
@@ -28,9 +28,8 @@ struct rank2_update_selector<Scalar,Index,Upper>
for (Index i=0; i<size; ++i)
{
- Map<PlainVector>(mat+stride*i, i+1) +=
- conj(alpha) * conj(_u[i]) * v.head(i+1)
- + alpha * conj(_v[i]) * u.head(i+1);
+ Map<PlainVector>(mat+stride*i, i+1) += conj(alpha) * conj(_u[i]) * v.head(i+1)
+ + alpha * conj(_v[i]) * u.head(i+1);
}
}
};
@@ -45,9 +44,52 @@ struct rank2_update_selector<Scalar,Index,Lower>
for (Index i=0; i<size; ++i)
{
- Map<PlainVector>(mat+(stride+1)*i, size-i) +=
- conj(alpha) * conj(_u[i]) * v.tail(size-i)
- + alpha * conj(_v[i]) * u.tail(size-i);
+ Map<PlainVector>(mat+(stride+1)*i, size-i) += conj(alpha) * conj(_u[i]) * v.tail(size-i)
+ + alpha * conj(_v[i]) * u.tail(size-i);
+ }
+ }
+};
+
+/* Optimized selfadjoint matrix += alpha * uv' + conj(alpha)*vu'
+ * The matrix is in packed form.
+ */
+template<typename Scalar, typename Index, int UpLo>
+struct packed_rank2_update_selector;
+
+template<typename Scalar, typename Index>
+struct packed_rank2_update_selector<Scalar,Index,Upper>
+{
+ static void run(Index size, Scalar* mat, const Scalar* _u, const Scalar* _v, Scalar alpha)
+ {
+ typedef Matrix<Scalar,Dynamic,1> PlainVector;
+ Map<const PlainVector> u(_u, size), v(_v, size);
+ Index offset = 0;
+
+ for (Index i=0; i<size; ++i)
+ {
+ offset += i;
+ Map<PlainVector>(mat+offset, i+1) += conj(alpha) * conj(_u[i]) * v.head(i+1)
+ + alpha * conj(_v[i]) * u.head(i+1);
+ mat[offset+i] = real(mat[offset+i]);
+ }
+ }
+};
+
+template<typename Scalar, typename Index>
+struct packed_rank2_update_selector<Scalar,Index,Lower>
+{
+ static void run(Index size, Scalar* mat, const Scalar* _u, const Scalar* _v, Scalar alpha)
+ {
+ typedef Matrix<Scalar,Dynamic,1> PlainVector;
+ Map<const PlainVector> u(_u, size), v(_v, size);
+ Index offset = 0;
+
+ for (Index i=0; i<size; ++i)
+ {
+ Map<PlainVector>(mat+offset, size-i) += conj(alpha) * conj(_u[i]) * v.tail(size-i)
+ + alpha * conj(_v[i]) * u.tail(size-i);
+ mat[offset] = real(mat[offset]);
+ offset += size-i;
}
}
};
diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h
index 8ab3cb638..46bddc134 100644
--- a/blas/level2_cplx_impl.h
+++ b/blas/level2_cplx_impl.h
@@ -120,10 +120,53 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa
* where alpha is a scalar, x and y are n element vectors and A is an
* n by n hermitian matrix, supplied in packed form.
*/
-// int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *x, int *incx, RealScalar *y, int *incy, RealScalar *ap)
-// {
-// return 1;
-// }
+int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap)
+{
+ typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar);
+ static functype func[2];
+
+ static bool init = false;
+ if(!init)
+ {
+ for(int k=0; k<2; ++k)
+ func[k] = 0;
+
+ func[UP] = (internal::packed_rank2_update_selector<Scalar,int,Upper>::run);
+ func[LO] = (internal::packed_rank2_update_selector<Scalar,int,Lower>::run);
+
+ init = true;
+ }
+
+ Scalar* x = reinterpret_cast<Scalar*>(px);
+ Scalar* y = reinterpret_cast<Scalar*>(py);
+ Scalar* ap = reinterpret_cast<Scalar*>(pap);
+ Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
+
+ int info = 0;
+ if(UPLO(*uplo)==INVALID) info = 1;
+ else if(*n<0) info = 2;
+ else if(*incx==0) info = 5;
+ else if(*incy==0) info = 7;
+ if(info)
+ return xerbla_(SCALAR_SUFFIX_UP"HPR2 ",&info,6);
+
+ if(alpha==Scalar(0))
+ return 1;
+
+ Scalar* x_cpy = get_compact_vector(x, *n, *incx);
+ Scalar* y_cpy = get_compact_vector(y, *n, *incy);
+
+ int code = UPLO(*uplo);
+ if(code>=2 || func[code]==0)
+ return 0;
+
+ func[code](*n, ap, x_cpy, y_cpy, alpha);
+
+ if(x_cpy!=x) delete[] x_cpy;
+ if(y_cpy!=y) delete[] y_cpy;
+
+ return 1;
+}
/** ZHER performs the hermitian rank 1 operation
*
diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h
index e2575d30a..ca4469d7a 100644
--- a/blas/level2_real_impl.h
+++ b/blas/level2_real_impl.h
@@ -243,10 +243,53 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px
* where alpha is a scalar, x and y are n element vectors and A is an
* n by n symmetric matrix, supplied in packed form.
*/
-// int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *alpha, RealScalar *x, int *incx, RealScalar *y, int *incy, RealScalar *ap)
-// {
-// return 1;
-// }
+int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap)
+{
+ typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar);
+ static functype func[2];
+
+ static bool init = false;
+ if(!init)
+ {
+ for(int k=0; k<2; ++k)
+ func[k] = 0;
+
+ func[UP] = (internal::packed_rank2_update_selector<Scalar,int,Upper>::run);
+ func[LO] = (internal::packed_rank2_update_selector<Scalar,int,Lower>::run);
+
+ init = true;
+ }
+
+ Scalar* x = reinterpret_cast<Scalar*>(px);
+ Scalar* y = reinterpret_cast<Scalar*>(py);
+ Scalar* ap = reinterpret_cast<Scalar*>(pap);
+ Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
+
+ int info = 0;
+ if(UPLO(*uplo)==INVALID) info = 1;
+ else if(*n<0) info = 2;
+ else if(*incx==0) info = 5;
+ else if(*incy==0) info = 7;
+ if(info)
+ return xerbla_(SCALAR_SUFFIX_UP"SPR2 ",&info,6);
+
+ if(alpha==Scalar(0))
+ return 1;
+
+ Scalar* x_cpy = get_compact_vector(x, *n, *incx);
+ Scalar* y_cpy = get_compact_vector(y, *n, *incy);
+
+ int code = UPLO(*uplo);
+ if(code>=2 || func[code]==0)
+ return 0;
+
+ func[code](*n, ap, x_cpy, y_cpy, alpha);
+
+ if(x_cpy!=x) delete[] x_cpy;
+ if(y_cpy!=y) delete[] y_cpy;
+
+ return 1;
+}
/** DGER performs the rank 1 operation
*