From e4e7585a24a4ef08742b4c198ab6e37e93eececf Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sat, 8 Sep 2012 17:29:44 +0800 Subject: Implement rank-2 update for packed matrices. --- blas/CMakeLists.txt | 8 ++++---- blas/Rank2Update.h | 54 +++++++++++++++++++++++++++++++++++++++++++------ blas/level2_cplx_impl.h | 51 ++++++++++++++++++++++++++++++++++++++++++---- blas/level2_real_impl.h | 51 ++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 146 insertions(+), 18 deletions(-) (limited to 'blas') diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt index 453d5874c..e46fde4d4 100644 --- a/blas/CMakeLists.txt +++ b/blas/CMakeLists.txt @@ -18,10 +18,10 @@ if(EIGEN_Fortran_COMPILER_WORKS) set(EigenBlas_SRCS ${EigenBlas_SRCS} complexdots.f srotm.f srotmg.f drotm.f drotmg.f - lsame.f chpr2.f dspmv.f dtpsv.f ssbmv.f sspr.f stpmv.f - zhpr2.f chbmv.f chpr.f ctpmv.f dspr2.f sspmv.f stpsv.f - zhbmv.f zhpr.f ztpmv.f chpmv.f ctpsv.f dsbmv.f dspr.f dtpmv.f sspr2.f - zhpmv.f ztpsv.f + lsame.f dspmv.f dtpsv.f ssbmv.f sspr.f stpmv.f + chbmv.f chpr.f ctpmv.f sspmv.f stpsv.f + zhbmv.f zhpr.f ztpmv.f chpmv.f ctpsv.f dsbmv.f dspr.f dtpmv.f + zhpmv.f ztpsv.f dtbmv.f stbmv.f ctbmv.f ztbmv.f ) else() diff --git a/blas/Rank2Update.h b/blas/Rank2Update.h index e7a5eeaba..0cf3a1961 100644 --- a/blas/Rank2Update.h +++ b/blas/Rank2Update.h @@ -28,9 +28,8 @@ struct rank2_update_selector for (Index i=0; i(mat+stride*i, i+1) += - conj(alpha) * conj(_u[i]) * v.head(i+1) - + alpha * conj(_v[i]) * u.head(i+1); + Map(mat+stride*i, i+1) += conj(alpha) * conj(_u[i]) * v.head(i+1) + + alpha * conj(_v[i]) * u.head(i+1); } } }; @@ -45,9 +44,52 @@ struct rank2_update_selector for (Index i=0; i(mat+(stride+1)*i, size-i) += - conj(alpha) * conj(_u[i]) * v.tail(size-i) - + alpha * conj(_v[i]) * u.tail(size-i); + Map(mat+(stride+1)*i, size-i) += conj(alpha) * conj(_u[i]) * v.tail(size-i) + + alpha * conj(_v[i]) * u.tail(size-i); + } + } +}; + +/* Optimized selfadjoint matrix += alpha * uv' + conj(alpha)*vu' + * The matrix is in packed form. + */ +template +struct packed_rank2_update_selector; + +template +struct packed_rank2_update_selector +{ + static void run(Index size, Scalar* mat, const Scalar* _u, const Scalar* _v, Scalar alpha) + { + typedef Matrix PlainVector; + Map u(_u, size), v(_v, size); + Index offset = 0; + + for (Index i=0; i(mat+offset, i+1) += conj(alpha) * conj(_u[i]) * v.head(i+1) + + alpha * conj(_v[i]) * u.head(i+1); + mat[offset+i] = real(mat[offset+i]); + } + } +}; + +template +struct packed_rank2_update_selector +{ + static void run(Index size, Scalar* mat, const Scalar* _u, const Scalar* _v, Scalar alpha) + { + typedef Matrix PlainVector; + Map u(_u, size), v(_v, size); + Index offset = 0; + + for (Index i=0; i(mat+offset, size-i) += conj(alpha) * conj(_u[i]) * v.tail(size-i) + + alpha * conj(_v[i]) * u.tail(size-i); + mat[offset] = real(mat[offset]); + offset += size-i; } } }; diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h index 8ab3cb638..46bddc134 100644 --- a/blas/level2_cplx_impl.h +++ b/blas/level2_cplx_impl.h @@ -120,10 +120,53 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa * where alpha is a scalar, x and y are n element vectors and A is an * n by n hermitian matrix, supplied in packed form. */ -// int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *x, int *incx, RealScalar *y, int *incy, RealScalar *ap) -// { -// return 1; -// } +int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap) +{ + typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar); + static functype func[2]; + + static bool init = false; + if(!init) + { + for(int k=0; k<2; ++k) + func[k] = 0; + + func[UP] = (internal::packed_rank2_update_selector::run); + func[LO] = (internal::packed_rank2_update_selector::run); + + init = true; + } + + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + Scalar* ap = reinterpret_cast(pap); + Scalar alpha = *reinterpret_cast(palpha); + + int info = 0; + if(UPLO(*uplo)==INVALID) info = 1; + else if(*n<0) info = 2; + else if(*incx==0) info = 5; + else if(*incy==0) info = 7; + if(info) + return xerbla_(SCALAR_SUFFIX_UP"HPR2 ",&info,6); + + if(alpha==Scalar(0)) + return 1; + + Scalar* x_cpy = get_compact_vector(x, *n, *incx); + Scalar* y_cpy = get_compact_vector(y, *n, *incy); + + int code = UPLO(*uplo); + if(code>=2 || func[code]==0) + return 0; + + func[code](*n, ap, x_cpy, y_cpy, alpha); + + if(x_cpy!=x) delete[] x_cpy; + if(y_cpy!=y) delete[] y_cpy; + + return 1; +} /** ZHER performs the hermitian rank 1 operation * diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h index e2575d30a..ca4469d7a 100644 --- a/blas/level2_real_impl.h +++ b/blas/level2_real_impl.h @@ -243,10 +243,53 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px * where alpha is a scalar, x and y are n element vectors and A is an * n by n symmetric matrix, supplied in packed form. */ -// int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *alpha, RealScalar *x, int *incx, RealScalar *y, int *incy, RealScalar *ap) -// { -// return 1; -// } +int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap) +{ + typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar); + static functype func[2]; + + static bool init = false; + if(!init) + { + for(int k=0; k<2; ++k) + func[k] = 0; + + func[UP] = (internal::packed_rank2_update_selector::run); + func[LO] = (internal::packed_rank2_update_selector::run); + + init = true; + } + + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + Scalar* ap = reinterpret_cast(pap); + Scalar alpha = *reinterpret_cast(palpha); + + int info = 0; + if(UPLO(*uplo)==INVALID) info = 1; + else if(*n<0) info = 2; + else if(*incx==0) info = 5; + else if(*incy==0) info = 7; + if(info) + return xerbla_(SCALAR_SUFFIX_UP"SPR2 ",&info,6); + + if(alpha==Scalar(0)) + return 1; + + Scalar* x_cpy = get_compact_vector(x, *n, *incx); + Scalar* y_cpy = get_compact_vector(y, *n, *incy); + + int code = UPLO(*uplo); + if(code>=2 || func[code]==0) + return 0; + + func[code](*n, ap, x_cpy, y_cpy, alpha); + + if(x_cpy!=x) delete[] x_cpy; + if(y_cpy!=y) delete[] y_cpy; + + return 1; +} /** DGER performs the rank 1 operation * -- cgit v1.2.3