From 1b8f4164082f82265f8118d89de71bb14d7f0eb6 Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sat, 8 Sep 2012 22:51:40 +0800 Subject: Implement rank-1 update for self-adjoint packed matrices. --- blas/CMakeLists.txt | 4 ++-- blas/SelfadjointPackedProduct.h | 47 +++++++++++++++++++++++++++++++++++++++++ blas/common.h | 1 + blas/level2_real_impl.h | 47 +++++++++++++++++++++++++++++++++++++---- 4 files changed, 93 insertions(+), 6 deletions(-) create mode 100644 blas/SelfadjointPackedProduct.h (limited to 'blas') diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt index e46fde4d4..7ee6f0591 100644 --- a/blas/CMakeLists.txt +++ b/blas/CMakeLists.txt @@ -18,9 +18,9 @@ if(EIGEN_Fortran_COMPILER_WORKS) set(EigenBlas_SRCS ${EigenBlas_SRCS} complexdots.f srotm.f srotmg.f drotm.f drotmg.f - lsame.f dspmv.f dtpsv.f ssbmv.f sspr.f stpmv.f + lsame.f dspmv.f dtpsv.f ssbmv.f stpmv.f chbmv.f chpr.f ctpmv.f sspmv.f stpsv.f - zhbmv.f zhpr.f ztpmv.f chpmv.f ctpsv.f dsbmv.f dspr.f dtpmv.f + zhbmv.f zhpr.f ztpmv.f chpmv.f ctpsv.f dsbmv.f dtpmv.f zhpmv.f ztpsv.f dtbmv.f stbmv.f ctbmv.f ztbmv.f ) diff --git a/blas/SelfadjointPackedProduct.h b/blas/SelfadjointPackedProduct.h new file mode 100644 index 000000000..4ea36b569 --- /dev/null +++ b/blas/SelfadjointPackedProduct.h @@ -0,0 +1,47 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Chen-Pang He +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SELFADJOINT_PACKED_PRODUCT_H +#define EIGEN_SELFADJOINT_PACKED_PRODUCT_H + +namespace internal { + +/* Optimized matrix += alpha * uv' + * The matrix is in packed form. + * + * FIXME I always fail tests for complex self-adjoint matrices. + * + ******* FATAL ERROR - PARAMETER NUMBER 6 WAS CHANGED INCORRECTLY ******* + ******* xHPR FAILED ON CALL NUMBER: + 2: xHPR ('U', 1, 0.0, X, 1, AP) + */ +template +struct selfadjoint_packed_rank1_update +{ + static void run(Index size, Scalar* mat, const Scalar* vec, Scalar alpha) + { + typedef Map > OtherMap; + Index offset = 0; + + for (Index i=0; i >(mat+offset, UpLo==Lower ? size-i : (i+1)) + += alpha * conj(vec[i]) * OtherMap(vec+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1)); + //FIXME This should be handled outside. + mat[offset+(UpLo==Lower ? 0 : i)] = real(mat[offset+(UpLo==Lower ? 0 : i)]); + offset += UpLo==Lower ? size-i : (i+1); + } + } +}; + +//TODO struct selfadjoint_packed_product_selector + +} // end namespace internal + +#endif // EIGEN_SELFADJOINT_PACKED_PRODUCT_H diff --git a/blas/common.h b/blas/common.h index 26b4ed5a3..a14d32289 100644 --- a/blas/common.h +++ b/blas/common.h @@ -76,6 +76,7 @@ namespace Eigen { #include "BandTriangularSolver.h" #include "GeneralRank1Update.h" #include "Rank2Update.h" +#include "SelfadjointPackedProduct.h" } using namespace Eigen; diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h index ca4469d7a..735545e2b 100644 --- a/blas/level2_real_impl.h +++ b/blas/level2_real_impl.h @@ -231,10 +231,49 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px * where alpha is a real scalar, x is an n element vector and A is an * n by n symmetric matrix, supplied in packed form. */ -// int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *alpha, Scalar *x, int *incx, Scalar *ap) -// { -// return 1; -// } +int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *pap) +{ + typedef void (*functype)(int, Scalar*, const Scalar*, Scalar); + static functype func[2]; + + static bool init = false; + if(!init) + { + for(int k=0; k<2; ++k) + func[k] = 0; + + func[UP] = (internal::selfadjoint_packed_rank1_update::run); + func[LO] = (internal::selfadjoint_packed_rank1_update::run); + + init = true; + } + + Scalar* x = reinterpret_cast(px); + Scalar* ap = reinterpret_cast(pap); + Scalar alpha = *reinterpret_cast(palpha); + + int info = 0; + if(UPLO(*uplo)==INVALID) info = 1; + else if(*n<0) info = 2; + else if(*incx==0) info = 5; + if(info) + return xerbla_(SCALAR_SUFFIX_UP"SPR ",&info,6); + + if(alpha==Scalar(0)) + return 1; + + Scalar* x_cpy = get_compact_vector(x, *n, *incx); + + int code = UPLO(*uplo); + if(code>=2 || func[code]==0) + return 0; + + func[code](*n, ap, x_cpy, alpha); + + if(x_cpy!=x) delete[] x_cpy; + + return 1; +} /** DSPR2 performs the symmetric rank 2 operation * -- cgit v1.2.3