From b5f9bec8ac2771abbd74e7abd03734d578a88e80 Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sat, 8 Sep 2012 15:47:33 +0800 Subject: Perform direct calls in xHEMV and xSYMV. --- blas/level2_cplx_impl.h | 23 ++++++++++++++++++++--- blas/level2_real_impl.h | 23 ++++++++++++++++++++--- 2 files changed, 40 insertions(+), 6 deletions(-) (limited to 'blas') diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h index 477f6d649..8ab3cb638 100644 --- a/blas/level2_cplx_impl.h +++ b/blas/level2_cplx_impl.h @@ -18,6 +18,21 @@ */ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) { + typedef void (*functype)(int, const Scalar*, int, const Scalar*, int, Scalar*, Scalar); + static functype func[2]; + + static bool init = false; + if(!init) + { + for(int k=0; k<2; ++k) + func[k] = 0; + + func[UP] = (internal::selfadjoint_matrix_vector_product::run); + func[LO] = (internal::selfadjoint_matrix_vector_product::run); + + init = true; + } + Scalar* a = reinterpret_cast(pa); Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); @@ -48,9 +63,11 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa if(alpha!=Scalar(0)) { - // TODO performs a direct call to the underlying implementation function - if(UPLO(*uplo)==UP) vector(actual_y,*n).noalias() += matrix(a,*n,*n,*lda).selfadjointView() * (alpha * vector(actual_x,*n)); - else if(UPLO(*uplo)==LO) vector(actual_y,*n).noalias() += matrix(a,*n,*n,*lda).selfadjointView() * (alpha * vector(actual_x,*n)); + int code = UPLO(*uplo); + if(code>=2 || func[code]==0) + return 0; + + func[code](*n, a, *lda, actual_x, 1, actual_y, alpha); } if(actual_x!=x) delete[] actual_x; diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h index 06da283d3..e2575d30a 100644 --- a/blas/level2_real_impl.h +++ b/blas/level2_real_impl.h @@ -12,6 +12,21 @@ // y = alpha*A*x + beta*y int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) { + typedef void (*functype)(int, const Scalar*, int, const Scalar*, int, Scalar*, Scalar); + static functype func[2]; + + static bool init = false; + if(!init) + { + for(int k=0; k<2; ++k) + func[k] = 0; + + func[UP] = (internal::selfadjoint_matrix_vector_product::run); + func[LO] = (internal::selfadjoint_matrix_vector_product::run); + + init = true; + } + Scalar* a = reinterpret_cast(pa); Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); @@ -40,9 +55,11 @@ int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *p else vector(actual_y, *n) *= beta; } - // TODO performs a direct call to the underlying implementation function - if(UPLO(*uplo)==UP) vector(actual_y,*n).noalias() += matrix(a,*n,*n,*lda).selfadjointView() * (alpha * vector(actual_x,*n)); - else if(UPLO(*uplo)==LO) vector(actual_y,*n).noalias() += matrix(a,*n,*n,*lda).selfadjointView() * (alpha * vector(actual_x,*n)); + int code = UPLO(*uplo); + if(code>=2 || func[code]==0) + return 0; + + func[code](*n, a, *lda, actual_x, 1, actual_y, alpha); if(actual_x!=x) delete[] actual_x; if(actual_y!=y) delete[] copy_back(actual_y,y,*n,*incy); -- cgit v1.2.3 From e4e7585a24a4ef08742b4c198ab6e37e93eececf Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sat, 8 Sep 2012 17:29:44 +0800 Subject: Implement rank-2 update for packed matrices. --- blas/CMakeLists.txt | 8 ++++---- blas/Rank2Update.h | 54 +++++++++++++++++++++++++++++++++++++++++++------ blas/level2_cplx_impl.h | 51 ++++++++++++++++++++++++++++++++++++++++++---- blas/level2_real_impl.h | 51 ++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 146 insertions(+), 18 deletions(-) (limited to 'blas') diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt index 453d5874c..e46fde4d4 100644 --- a/blas/CMakeLists.txt +++ b/blas/CMakeLists.txt @@ -18,10 +18,10 @@ if(EIGEN_Fortran_COMPILER_WORKS) set(EigenBlas_SRCS ${EigenBlas_SRCS} complexdots.f srotm.f srotmg.f drotm.f drotmg.f - lsame.f chpr2.f dspmv.f dtpsv.f ssbmv.f sspr.f stpmv.f - zhpr2.f chbmv.f chpr.f ctpmv.f dspr2.f sspmv.f stpsv.f - zhbmv.f zhpr.f ztpmv.f chpmv.f ctpsv.f dsbmv.f dspr.f dtpmv.f sspr2.f - zhpmv.f ztpsv.f + lsame.f dspmv.f dtpsv.f ssbmv.f sspr.f stpmv.f + chbmv.f chpr.f ctpmv.f sspmv.f stpsv.f + zhbmv.f zhpr.f ztpmv.f chpmv.f ctpsv.f dsbmv.f dspr.f dtpmv.f + zhpmv.f ztpsv.f dtbmv.f stbmv.f ctbmv.f ztbmv.f ) else() diff --git a/blas/Rank2Update.h b/blas/Rank2Update.h index e7a5eeaba..0cf3a1961 100644 --- a/blas/Rank2Update.h +++ b/blas/Rank2Update.h @@ -28,9 +28,8 @@ struct rank2_update_selector for (Index i=0; i(mat+stride*i, i+1) += - conj(alpha) * conj(_u[i]) * v.head(i+1) - + alpha * conj(_v[i]) * u.head(i+1); + Map(mat+stride*i, i+1) += conj(alpha) * conj(_u[i]) * v.head(i+1) + + alpha * conj(_v[i]) * u.head(i+1); } } }; @@ -45,9 +44,52 @@ struct rank2_update_selector for (Index i=0; i(mat+(stride+1)*i, size-i) += - conj(alpha) * conj(_u[i]) * v.tail(size-i) - + alpha * conj(_v[i]) * u.tail(size-i); + Map(mat+(stride+1)*i, size-i) += conj(alpha) * conj(_u[i]) * v.tail(size-i) + + alpha * conj(_v[i]) * u.tail(size-i); + } + } +}; + +/* Optimized selfadjoint matrix += alpha * uv' + conj(alpha)*vu' + * The matrix is in packed form. + */ +template +struct packed_rank2_update_selector; + +template +struct packed_rank2_update_selector +{ + static void run(Index size, Scalar* mat, const Scalar* _u, const Scalar* _v, Scalar alpha) + { + typedef Matrix PlainVector; + Map u(_u, size), v(_v, size); + Index offset = 0; + + for (Index i=0; i(mat+offset, i+1) += conj(alpha) * conj(_u[i]) * v.head(i+1) + + alpha * conj(_v[i]) * u.head(i+1); + mat[offset+i] = real(mat[offset+i]); + } + } +}; + +template +struct packed_rank2_update_selector +{ + static void run(Index size, Scalar* mat, const Scalar* _u, const Scalar* _v, Scalar alpha) + { + typedef Matrix PlainVector; + Map u(_u, size), v(_v, size); + Index offset = 0; + + for (Index i=0; i(mat+offset, size-i) += conj(alpha) * conj(_u[i]) * v.tail(size-i) + + alpha * conj(_v[i]) * u.tail(size-i); + mat[offset] = real(mat[offset]); + offset += size-i; } } }; diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h index 8ab3cb638..46bddc134 100644 --- a/blas/level2_cplx_impl.h +++ b/blas/level2_cplx_impl.h @@ -120,10 +120,53 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa * where alpha is a scalar, x and y are n element vectors and A is an * n by n hermitian matrix, supplied in packed form. */ -// int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *x, int *incx, RealScalar *y, int *incy, RealScalar *ap) -// { -// return 1; -// } +int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap) +{ + typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar); + static functype func[2]; + + static bool init = false; + if(!init) + { + for(int k=0; k<2; ++k) + func[k] = 0; + + func[UP] = (internal::packed_rank2_update_selector::run); + func[LO] = (internal::packed_rank2_update_selector::run); + + init = true; + } + + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + Scalar* ap = reinterpret_cast(pap); + Scalar alpha = *reinterpret_cast(palpha); + + int info = 0; + if(UPLO(*uplo)==INVALID) info = 1; + else if(*n<0) info = 2; + else if(*incx==0) info = 5; + else if(*incy==0) info = 7; + if(info) + return xerbla_(SCALAR_SUFFIX_UP"HPR2 ",&info,6); + + if(alpha==Scalar(0)) + return 1; + + Scalar* x_cpy = get_compact_vector(x, *n, *incx); + Scalar* y_cpy = get_compact_vector(y, *n, *incy); + + int code = UPLO(*uplo); + if(code>=2 || func[code]==0) + return 0; + + func[code](*n, ap, x_cpy, y_cpy, alpha); + + if(x_cpy!=x) delete[] x_cpy; + if(y_cpy!=y) delete[] y_cpy; + + return 1; +} /** ZHER performs the hermitian rank 1 operation * diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h index e2575d30a..ca4469d7a 100644 --- a/blas/level2_real_impl.h +++ b/blas/level2_real_impl.h @@ -243,10 +243,53 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px * where alpha is a scalar, x and y are n element vectors and A is an * n by n symmetric matrix, supplied in packed form. */ -// int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *alpha, RealScalar *x, int *incx, RealScalar *y, int *incy, RealScalar *ap) -// { -// return 1; -// } +int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap) +{ + typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar); + static functype func[2]; + + static bool init = false; + if(!init) + { + for(int k=0; k<2; ++k) + func[k] = 0; + + func[UP] = (internal::packed_rank2_update_selector::run); + func[LO] = (internal::packed_rank2_update_selector::run); + + init = true; + } + + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + Scalar* ap = reinterpret_cast(pap); + Scalar alpha = *reinterpret_cast(palpha); + + int info = 0; + if(UPLO(*uplo)==INVALID) info = 1; + else if(*n<0) info = 2; + else if(*incx==0) info = 5; + else if(*incy==0) info = 7; + if(info) + return xerbla_(SCALAR_SUFFIX_UP"SPR2 ",&info,6); + + if(alpha==Scalar(0)) + return 1; + + Scalar* x_cpy = get_compact_vector(x, *n, *incx); + Scalar* y_cpy = get_compact_vector(y, *n, *incy); + + int code = UPLO(*uplo); + if(code>=2 || func[code]==0) + return 0; + + func[code](*n, ap, x_cpy, y_cpy, alpha); + + if(x_cpy!=x) delete[] x_cpy; + if(y_cpy!=y) delete[] y_cpy; + + return 1; +} /** DGER performs the rank 1 operation * -- cgit v1.2.3 From 17c746523e2cfc106ed8213cb1690149ee713ea9 Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sat, 8 Sep 2012 21:25:09 +0800 Subject: Comment FIXMEs on Rank2Update.h and remove unused files. --- blas/Rank2Update.h | 40 ++++----- blas/chpr2.f | 255 ----------------------------------------------------- blas/dspr2.f | 233 ------------------------------------------------ blas/sspr2.f | 233 ------------------------------------------------ blas/zhpr2.f | 255 ----------------------------------------------------- 5 files changed, 19 insertions(+), 997 deletions(-) delete mode 100644 blas/chpr2.f delete mode 100644 blas/dspr2.f delete mode 100644 blas/sspr2.f delete mode 100644 blas/zhpr2.f (limited to 'blas') diff --git a/blas/Rank2Update.h b/blas/Rank2Update.h index 0cf3a1961..2767916e2 100644 --- a/blas/Rank2Update.h +++ b/blas/Rank2Update.h @@ -23,13 +23,12 @@ struct rank2_update_selector { static void run(Index size, Scalar* mat, Index stride, const Scalar* _u, const Scalar* _v, Scalar alpha) { - typedef Matrix PlainVector; - Map u(_u, size), v(_v, size); - + Map > u(_u, size), v(_v, size); for (Index i=0; i(mat+stride*i, i+1) += conj(alpha) * conj(_u[i]) * v.head(i+1) - + alpha * conj(_v[i]) * u.head(i+1); + Map >(mat+stride*i, i+1) += + conj(alpha) * conj(_u[i]) * v.head(i+1) + + alpha * conj(_v[i]) * u.head(i+1); } } }; @@ -39,13 +38,12 @@ struct rank2_update_selector { static void run(Index size, Scalar* mat, Index stride, const Scalar* _u, const Scalar* _v, Scalar alpha) { - typedef Matrix PlainVector; - Map u(_u, size), v(_v, size); - + Map > u(_u, size), v(_v, size); for (Index i=0; i(mat+(stride+1)*i, size-i) += conj(alpha) * conj(_u[i]) * v.tail(size-i) - + alpha * conj(_v[i]) * u.tail(size-i); + Map >(mat+(stride+1)*i, size-i) += + conj(alpha) * conj(_u[i]) * v.tail(size-i) + + alpha * conj(_v[i]) * u.tail(size-i); } } }; @@ -61,16 +59,16 @@ struct packed_rank2_update_selector { static void run(Index size, Scalar* mat, const Scalar* _u, const Scalar* _v, Scalar alpha) { - typedef Matrix PlainVector; - Map u(_u, size), v(_v, size); + Map > u(_u, size), v(_v, size); Index offset = 0; - for (Index i=0; i(mat+offset, i+1) += conj(alpha) * conj(_u[i]) * v.head(i+1) - + alpha * conj(_v[i]) * u.head(i+1); + Map >(mat+offset, i+1) += + conj(alpha) * conj(_u[i]) * v.head(i+1) + + alpha * conj(_v[i]) * u.head(i+1); + //FIXME This should be handled outside. mat[offset+i] = real(mat[offset+i]); + offset += i+1; } } }; @@ -80,14 +78,14 @@ struct packed_rank2_update_selector { static void run(Index size, Scalar* mat, const Scalar* _u, const Scalar* _v, Scalar alpha) { - typedef Matrix PlainVector; - Map u(_u, size), v(_v, size); + Map > u(_u, size), v(_v, size); Index offset = 0; - for (Index i=0; i(mat+offset, size-i) += conj(alpha) * conj(_u[i]) * v.tail(size-i) - + alpha * conj(_v[i]) * u.tail(size-i); + Map >(mat+offset, size-i) += + conj(alpha) * conj(_u[i]) * v.tail(size-i) + + alpha * conj(_v[i]) * u.tail(size-i); + //FIXME This should be handled outside. mat[offset] = real(mat[offset]); offset += size-i; } diff --git a/blas/chpr2.f b/blas/chpr2.f deleted file mode 100644 index a0020ef3e..000000000 --- a/blas/chpr2.f +++ /dev/null @@ -1,255 +0,0 @@ - SUBROUTINE CHPR2(UPLO,N,ALPHA,X,INCX,Y,INCY,AP) -* .. Scalar Arguments .. - COMPLEX ALPHA - INTEGER INCX,INCY,N - CHARACTER UPLO -* .. -* .. Array Arguments .. - COMPLEX AP(*),X(*),Y(*) -* .. -* -* Purpose -* ======= -* -* CHPR2 performs the hermitian rank 2 operation -* -* A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, -* -* where alpha is a scalar, x and y are n element vectors and A is an -* n by n hermitian matrix, supplied in packed form. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the upper or lower -* triangular part of the matrix A is supplied in the packed -* array AP as follows: -* -* UPLO = 'U' or 'u' The upper triangular part of A is -* supplied in AP. -* -* UPLO = 'L' or 'l' The lower triangular part of A is -* supplied in AP. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* ALPHA - COMPLEX . -* On entry, ALPHA specifies the scalar alpha. -* Unchanged on exit. -* -* X - COMPLEX array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. -* Unchanged on exit. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* Y - COMPLEX array of dimension at least -* ( 1 + ( n - 1 )*abs( INCY ) ). -* Before entry, the incremented array Y must contain the n -* element vector y. -* Unchanged on exit. -* -* INCY - INTEGER. -* On entry, INCY specifies the increment for the elements of -* Y. INCY must not be zero. -* Unchanged on exit. -* -* AP - COMPLEX array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular part of the hermitian matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) -* and a( 2, 2 ) respectively, and so on. On exit, the array -* AP is overwritten by the upper triangular part of the -* updated matrix. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular part of the hermitian matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) -* and a( 3, 1 ) respectively, and so on. On exit, the array -* AP is overwritten by the lower triangular part of the -* updated matrix. -* Note that the imaginary parts of the diagonal elements need -* not be set, they are assumed to be zero, and on exit they -* are set to zero. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - COMPLEX ZERO - PARAMETER (ZERO= (0.0E+0,0.0E+0)) -* .. -* .. Local Scalars .. - COMPLEX TEMP1,TEMP2 - INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC CONJG,REAL -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (N.LT.0) THEN - INFO = 2 - ELSE IF (INCX.EQ.0) THEN - INFO = 5 - ELSE IF (INCY.EQ.0) THEN - INFO = 7 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('CHPR2 ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN -* -* Set up the start points in X and Y if the increments are not both -* unity. -* - IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN - IF (INCX.GT.0) THEN - KX = 1 - ELSE - KX = 1 - (N-1)*INCX - END IF - IF (INCY.GT.0) THEN - KY = 1 - ELSE - KY = 1 - (N-1)*INCY - END IF - JX = KX - JY = KY - END IF -* -* Start the operations. In this version the elements of the array AP -* are accessed sequentially with one pass through AP. -* - KK = 1 - IF (LSAME(UPLO,'U')) THEN -* -* Form A when upper triangle is stored in AP. -* - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 20 J = 1,N - IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN - TEMP1 = ALPHA*CONJG(Y(J)) - TEMP2 = CONJG(ALPHA*X(J)) - K = KK - DO 10 I = 1,J - 1 - AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2 - K = K + 1 - 10 CONTINUE - AP(KK+J-1) = REAL(AP(KK+J-1)) + - + REAL(X(J)*TEMP1+Y(J)*TEMP2) - ELSE - AP(KK+J-1) = REAL(AP(KK+J-1)) - END IF - KK = KK + J - 20 CONTINUE - ELSE - DO 40 J = 1,N - IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN - TEMP1 = ALPHA*CONJG(Y(JY)) - TEMP2 = CONJG(ALPHA*X(JX)) - IX = KX - IY = KY - DO 30 K = KK,KK + J - 2 - AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2 - IX = IX + INCX - IY = IY + INCY - 30 CONTINUE - AP(KK+J-1) = REAL(AP(KK+J-1)) + - + REAL(X(JX)*TEMP1+Y(JY)*TEMP2) - ELSE - AP(KK+J-1) = REAL(AP(KK+J-1)) - END IF - JX = JX + INCX - JY = JY + INCY - KK = KK + J - 40 CONTINUE - END IF - ELSE -* -* Form A when lower triangle is stored in AP. -* - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 60 J = 1,N - IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN - TEMP1 = ALPHA*CONJG(Y(J)) - TEMP2 = CONJG(ALPHA*X(J)) - AP(KK) = REAL(AP(KK)) + - + REAL(X(J)*TEMP1+Y(J)*TEMP2) - K = KK + 1 - DO 50 I = J + 1,N - AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2 - K = K + 1 - 50 CONTINUE - ELSE - AP(KK) = REAL(AP(KK)) - END IF - KK = KK + N - J + 1 - 60 CONTINUE - ELSE - DO 80 J = 1,N - IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN - TEMP1 = ALPHA*CONJG(Y(JY)) - TEMP2 = CONJG(ALPHA*X(JX)) - AP(KK) = REAL(AP(KK)) + - + REAL(X(JX)*TEMP1+Y(JY)*TEMP2) - IX = JX - IY = JY - DO 70 K = KK + 1,KK + N - J - IX = IX + INCX - IY = IY + INCY - AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2 - 70 CONTINUE - ELSE - AP(KK) = REAL(AP(KK)) - END IF - JX = JX + INCX - JY = JY + INCY - KK = KK + N - J + 1 - 80 CONTINUE - END IF - END IF -* - RETURN -* -* End of CHPR2 . -* - END diff --git a/blas/dspr2.f b/blas/dspr2.f deleted file mode 100644 index 6f6b54a8c..000000000 --- a/blas/dspr2.f +++ /dev/null @@ -1,233 +0,0 @@ - SUBROUTINE DSPR2(UPLO,N,ALPHA,X,INCX,Y,INCY,AP) -* .. Scalar Arguments .. - DOUBLE PRECISION ALPHA - INTEGER INCX,INCY,N - CHARACTER UPLO -* .. -* .. Array Arguments .. - DOUBLE PRECISION AP(*),X(*),Y(*) -* .. -* -* Purpose -* ======= -* -* DSPR2 performs the symmetric rank 2 operation -* -* A := alpha*x*y' + alpha*y*x' + A, -* -* where alpha is a scalar, x and y are n element vectors and A is an -* n by n symmetric matrix, supplied in packed form. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the upper or lower -* triangular part of the matrix A is supplied in the packed -* array AP as follows: -* -* UPLO = 'U' or 'u' The upper triangular part of A is -* supplied in AP. -* -* UPLO = 'L' or 'l' The lower triangular part of A is -* supplied in AP. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* ALPHA - DOUBLE PRECISION. -* On entry, ALPHA specifies the scalar alpha. -* Unchanged on exit. -* -* X - DOUBLE PRECISION array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. -* Unchanged on exit. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* Y - DOUBLE PRECISION array of dimension at least -* ( 1 + ( n - 1 )*abs( INCY ) ). -* Before entry, the incremented array Y must contain the n -* element vector y. -* Unchanged on exit. -* -* INCY - INTEGER. -* On entry, INCY specifies the increment for the elements of -* Y. INCY must not be zero. -* Unchanged on exit. -* -* AP - DOUBLE PRECISION array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular part of the symmetric matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) -* and a( 2, 2 ) respectively, and so on. On exit, the array -* AP is overwritten by the upper triangular part of the -* updated matrix. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular part of the symmetric matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) -* and a( 3, 1 ) respectively, and so on. On exit, the array -* AP is overwritten by the lower triangular part of the -* updated matrix. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - DOUBLE PRECISION ZERO - PARAMETER (ZERO=0.0D+0) -* .. -* .. Local Scalars .. - DOUBLE PRECISION TEMP1,TEMP2 - INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (N.LT.0) THEN - INFO = 2 - ELSE IF (INCX.EQ.0) THEN - INFO = 5 - ELSE IF (INCY.EQ.0) THEN - INFO = 7 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('DSPR2 ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN -* -* Set up the start points in X and Y if the increments are not both -* unity. -* - IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN - IF (INCX.GT.0) THEN - KX = 1 - ELSE - KX = 1 - (N-1)*INCX - END IF - IF (INCY.GT.0) THEN - KY = 1 - ELSE - KY = 1 - (N-1)*INCY - END IF - JX = KX - JY = KY - END IF -* -* Start the operations. In this version the elements of the array AP -* are accessed sequentially with one pass through AP. -* - KK = 1 - IF (LSAME(UPLO,'U')) THEN -* -* Form A when upper triangle is stored in AP. -* - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 20 J = 1,N - IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN - TEMP1 = ALPHA*Y(J) - TEMP2 = ALPHA*X(J) - K = KK - DO 10 I = 1,J - AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2 - K = K + 1 - 10 CONTINUE - END IF - KK = KK + J - 20 CONTINUE - ELSE - DO 40 J = 1,N - IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN - TEMP1 = ALPHA*Y(JY) - TEMP2 = ALPHA*X(JX) - IX = KX - IY = KY - DO 30 K = KK,KK + J - 1 - AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2 - IX = IX + INCX - IY = IY + INCY - 30 CONTINUE - END IF - JX = JX + INCX - JY = JY + INCY - KK = KK + J - 40 CONTINUE - END IF - ELSE -* -* Form A when lower triangle is stored in AP. -* - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 60 J = 1,N - IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN - TEMP1 = ALPHA*Y(J) - TEMP2 = ALPHA*X(J) - K = KK - DO 50 I = J,N - AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2 - K = K + 1 - 50 CONTINUE - END IF - KK = KK + N - J + 1 - 60 CONTINUE - ELSE - DO 80 J = 1,N - IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN - TEMP1 = ALPHA*Y(JY) - TEMP2 = ALPHA*X(JX) - IX = JX - IY = JY - DO 70 K = KK,KK + N - J - AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2 - IX = IX + INCX - IY = IY + INCY - 70 CONTINUE - END IF - JX = JX + INCX - JY = JY + INCY - KK = KK + N - J + 1 - 80 CONTINUE - END IF - END IF -* - RETURN -* -* End of DSPR2 . -* - END diff --git a/blas/sspr2.f b/blas/sspr2.f deleted file mode 100644 index cd27c734b..000000000 --- a/blas/sspr2.f +++ /dev/null @@ -1,233 +0,0 @@ - SUBROUTINE SSPR2(UPLO,N,ALPHA,X,INCX,Y,INCY,AP) -* .. Scalar Arguments .. - REAL ALPHA - INTEGER INCX,INCY,N - CHARACTER UPLO -* .. -* .. Array Arguments .. - REAL AP(*),X(*),Y(*) -* .. -* -* Purpose -* ======= -* -* SSPR2 performs the symmetric rank 2 operation -* -* A := alpha*x*y' + alpha*y*x' + A, -* -* where alpha is a scalar, x and y are n element vectors and A is an -* n by n symmetric matrix, supplied in packed form. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the upper or lower -* triangular part of the matrix A is supplied in the packed -* array AP as follows: -* -* UPLO = 'U' or 'u' The upper triangular part of A is -* supplied in AP. -* -* UPLO = 'L' or 'l' The lower triangular part of A is -* supplied in AP. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* ALPHA - REAL . -* On entry, ALPHA specifies the scalar alpha. -* Unchanged on exit. -* -* X - REAL array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. -* Unchanged on exit. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* Y - REAL array of dimension at least -* ( 1 + ( n - 1 )*abs( INCY ) ). -* Before entry, the incremented array Y must contain the n -* element vector y. -* Unchanged on exit. -* -* INCY - INTEGER. -* On entry, INCY specifies the increment for the elements of -* Y. INCY must not be zero. -* Unchanged on exit. -* -* AP - REAL array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular part of the symmetric matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) -* and a( 2, 2 ) respectively, and so on. On exit, the array -* AP is overwritten by the upper triangular part of the -* updated matrix. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular part of the symmetric matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) -* and a( 3, 1 ) respectively, and so on. On exit, the array -* AP is overwritten by the lower triangular part of the -* updated matrix. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - REAL ZERO - PARAMETER (ZERO=0.0E+0) -* .. -* .. Local Scalars .. - REAL TEMP1,TEMP2 - INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (N.LT.0) THEN - INFO = 2 - ELSE IF (INCX.EQ.0) THEN - INFO = 5 - ELSE IF (INCY.EQ.0) THEN - INFO = 7 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('SSPR2 ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN -* -* Set up the start points in X and Y if the increments are not both -* unity. -* - IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN - IF (INCX.GT.0) THEN - KX = 1 - ELSE - KX = 1 - (N-1)*INCX - END IF - IF (INCY.GT.0) THEN - KY = 1 - ELSE - KY = 1 - (N-1)*INCY - END IF - JX = KX - JY = KY - END IF -* -* Start the operations. In this version the elements of the array AP -* are accessed sequentially with one pass through AP. -* - KK = 1 - IF (LSAME(UPLO,'U')) THEN -* -* Form A when upper triangle is stored in AP. -* - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 20 J = 1,N - IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN - TEMP1 = ALPHA*Y(J) - TEMP2 = ALPHA*X(J) - K = KK - DO 10 I = 1,J - AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2 - K = K + 1 - 10 CONTINUE - END IF - KK = KK + J - 20 CONTINUE - ELSE - DO 40 J = 1,N - IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN - TEMP1 = ALPHA*Y(JY) - TEMP2 = ALPHA*X(JX) - IX = KX - IY = KY - DO 30 K = KK,KK + J - 1 - AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2 - IX = IX + INCX - IY = IY + INCY - 30 CONTINUE - END IF - JX = JX + INCX - JY = JY + INCY - KK = KK + J - 40 CONTINUE - END IF - ELSE -* -* Form A when lower triangle is stored in AP. -* - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 60 J = 1,N - IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN - TEMP1 = ALPHA*Y(J) - TEMP2 = ALPHA*X(J) - K = KK - DO 50 I = J,N - AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2 - K = K + 1 - 50 CONTINUE - END IF - KK = KK + N - J + 1 - 60 CONTINUE - ELSE - DO 80 J = 1,N - IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN - TEMP1 = ALPHA*Y(JY) - TEMP2 = ALPHA*X(JX) - IX = JX - IY = JY - DO 70 K = KK,KK + N - J - AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2 - IX = IX + INCX - IY = IY + INCY - 70 CONTINUE - END IF - JX = JX + INCX - JY = JY + INCY - KK = KK + N - J + 1 - 80 CONTINUE - END IF - END IF -* - RETURN -* -* End of SSPR2 . -* - END diff --git a/blas/zhpr2.f b/blas/zhpr2.f deleted file mode 100644 index 99977462e..000000000 --- a/blas/zhpr2.f +++ /dev/null @@ -1,255 +0,0 @@ - SUBROUTINE ZHPR2(UPLO,N,ALPHA,X,INCX,Y,INCY,AP) -* .. Scalar Arguments .. - DOUBLE COMPLEX ALPHA - INTEGER INCX,INCY,N - CHARACTER UPLO -* .. -* .. Array Arguments .. - DOUBLE COMPLEX AP(*),X(*),Y(*) -* .. -* -* Purpose -* ======= -* -* ZHPR2 performs the hermitian rank 2 operation -* -* A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, -* -* where alpha is a scalar, x and y are n element vectors and A is an -* n by n hermitian matrix, supplied in packed form. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the upper or lower -* triangular part of the matrix A is supplied in the packed -* array AP as follows: -* -* UPLO = 'U' or 'u' The upper triangular part of A is -* supplied in AP. -* -* UPLO = 'L' or 'l' The lower triangular part of A is -* supplied in AP. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* ALPHA - COMPLEX*16 . -* On entry, ALPHA specifies the scalar alpha. -* Unchanged on exit. -* -* X - COMPLEX*16 array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. -* Unchanged on exit. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* Y - COMPLEX*16 array of dimension at least -* ( 1 + ( n - 1 )*abs( INCY ) ). -* Before entry, the incremented array Y must contain the n -* element vector y. -* Unchanged on exit. -* -* INCY - INTEGER. -* On entry, INCY specifies the increment for the elements of -* Y. INCY must not be zero. -* Unchanged on exit. -* -* AP - COMPLEX*16 array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular part of the hermitian matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) -* and a( 2, 2 ) respectively, and so on. On exit, the array -* AP is overwritten by the upper triangular part of the -* updated matrix. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular part of the hermitian matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) -* and a( 3, 1 ) respectively, and so on. On exit, the array -* AP is overwritten by the lower triangular part of the -* updated matrix. -* Note that the imaginary parts of the diagonal elements need -* not be set, they are assumed to be zero, and on exit they -* are set to zero. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - DOUBLE COMPLEX ZERO - PARAMETER (ZERO= (0.0D+0,0.0D+0)) -* .. -* .. Local Scalars .. - DOUBLE COMPLEX TEMP1,TEMP2 - INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC DBLE,DCONJG -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (N.LT.0) THEN - INFO = 2 - ELSE IF (INCX.EQ.0) THEN - INFO = 5 - ELSE IF (INCY.EQ.0) THEN - INFO = 7 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('ZHPR2 ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN -* -* Set up the start points in X and Y if the increments are not both -* unity. -* - IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN - IF (INCX.GT.0) THEN - KX = 1 - ELSE - KX = 1 - (N-1)*INCX - END IF - IF (INCY.GT.0) THEN - KY = 1 - ELSE - KY = 1 - (N-1)*INCY - END IF - JX = KX - JY = KY - END IF -* -* Start the operations. In this version the elements of the array AP -* are accessed sequentially with one pass through AP. -* - KK = 1 - IF (LSAME(UPLO,'U')) THEN -* -* Form A when upper triangle is stored in AP. -* - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 20 J = 1,N - IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN - TEMP1 = ALPHA*DCONJG(Y(J)) - TEMP2 = DCONJG(ALPHA*X(J)) - K = KK - DO 10 I = 1,J - 1 - AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2 - K = K + 1 - 10 CONTINUE - AP(KK+J-1) = DBLE(AP(KK+J-1)) + - + DBLE(X(J)*TEMP1+Y(J)*TEMP2) - ELSE - AP(KK+J-1) = DBLE(AP(KK+J-1)) - END IF - KK = KK + J - 20 CONTINUE - ELSE - DO 40 J = 1,N - IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN - TEMP1 = ALPHA*DCONJG(Y(JY)) - TEMP2 = DCONJG(ALPHA*X(JX)) - IX = KX - IY = KY - DO 30 K = KK,KK + J - 2 - AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2 - IX = IX + INCX - IY = IY + INCY - 30 CONTINUE - AP(KK+J-1) = DBLE(AP(KK+J-1)) + - + DBLE(X(JX)*TEMP1+Y(JY)*TEMP2) - ELSE - AP(KK+J-1) = DBLE(AP(KK+J-1)) - END IF - JX = JX + INCX - JY = JY + INCY - KK = KK + J - 40 CONTINUE - END IF - ELSE -* -* Form A when lower triangle is stored in AP. -* - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 60 J = 1,N - IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN - TEMP1 = ALPHA*DCONJG(Y(J)) - TEMP2 = DCONJG(ALPHA*X(J)) - AP(KK) = DBLE(AP(KK)) + - + DBLE(X(J)*TEMP1+Y(J)*TEMP2) - K = KK + 1 - DO 50 I = J + 1,N - AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2 - K = K + 1 - 50 CONTINUE - ELSE - AP(KK) = DBLE(AP(KK)) - END IF - KK = KK + N - J + 1 - 60 CONTINUE - ELSE - DO 80 J = 1,N - IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN - TEMP1 = ALPHA*DCONJG(Y(JY)) - TEMP2 = DCONJG(ALPHA*X(JX)) - AP(KK) = DBLE(AP(KK)) + - + DBLE(X(JX)*TEMP1+Y(JY)*TEMP2) - IX = JX - IY = JY - DO 70 K = KK + 1,KK + N - J - IX = IX + INCX - IY = IY + INCY - AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2 - 70 CONTINUE - ELSE - AP(KK) = DBLE(AP(KK)) - END IF - JX = JX + INCX - JY = JY + INCY - KK = KK + N - J + 1 - 80 CONTINUE - END IF - END IF -* - RETURN -* -* End of ZHPR2 . -* - END -- cgit v1.2.3 From dac5a8a37dd0e6c4b9e79a3d48d276787f06d8c3 Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sat, 8 Sep 2012 22:20:05 +0800 Subject: Simplify Rank2Update.h --- blas/Rank2Update.h | 68 +++++++++++------------------------------------------- 1 file changed, 14 insertions(+), 54 deletions(-) (limited to 'blas') diff --git a/blas/Rank2Update.h b/blas/Rank2Update.h index 2767916e2..8e1a676ee 100644 --- a/blas/Rank2Update.h +++ b/blas/Rank2Update.h @@ -16,34 +16,16 @@ namespace internal { * This is the low-level version of SelfadjointRank2Update.h */ template -struct rank2_update_selector; - -template -struct rank2_update_selector -{ - static void run(Index size, Scalar* mat, Index stride, const Scalar* _u, const Scalar* _v, Scalar alpha) - { - Map > u(_u, size), v(_v, size); - for (Index i=0; i >(mat+stride*i, i+1) += - conj(alpha) * conj(_u[i]) * v.head(i+1) - + alpha * conj(_v[i]) * u.head(i+1); - } - } -}; - -template -struct rank2_update_selector +struct rank2_update_selector { - static void run(Index size, Scalar* mat, Index stride, const Scalar* _u, const Scalar* _v, Scalar alpha) + static void run(Index size, Scalar* mat, Index stride, const Scalar* u, const Scalar* v, Scalar alpha) { - Map > u(_u, size), v(_v, size); + typedef Map > OtherMap; for (Index i=0; i >(mat+(stride+1)*i, size-i) += - conj(alpha) * conj(_u[i]) * v.tail(size-i) - + alpha * conj(_v[i]) * u.tail(size-i); + Map >(mat+stride*i+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1)) += + conj(alpha) * conj(u[i]) * OtherMap(v+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1)) + + alpha * conj(v[i]) * OtherMap(u+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1)); } } }; @@ -52,42 +34,20 @@ struct rank2_update_selector * The matrix is in packed form. */ template -struct packed_rank2_update_selector; - -template -struct packed_rank2_update_selector -{ - static void run(Index size, Scalar* mat, const Scalar* _u, const Scalar* _v, Scalar alpha) - { - Map > u(_u, size), v(_v, size); - Index offset = 0; - for (Index i=0; i >(mat+offset, i+1) += - conj(alpha) * conj(_u[i]) * v.head(i+1) - + alpha * conj(_v[i]) * u.head(i+1); - //FIXME This should be handled outside. - mat[offset+i] = real(mat[offset+i]); - offset += i+1; - } - } -}; - -template -struct packed_rank2_update_selector +struct packed_rank2_update_selector { - static void run(Index size, Scalar* mat, const Scalar* _u, const Scalar* _v, Scalar alpha) + static void run(Index size, Scalar* mat, const Scalar* u, const Scalar* v, Scalar alpha) { - Map > u(_u, size), v(_v, size); + typedef Map > OtherMap; Index offset = 0; for (Index i=0; i >(mat+offset, size-i) += - conj(alpha) * conj(_u[i]) * v.tail(size-i) - + alpha * conj(_v[i]) * u.tail(size-i); + Map >(mat+offset, UpLo==Lower ? size-i : (i+1)) += + conj(alpha) * conj(u[i]) * OtherMap(v+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1)) + + alpha * conj(v[i]) * OtherMap(u+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1)); //FIXME This should be handled outside. - mat[offset] = real(mat[offset]); - offset += size-i; + mat[offset+(UpLo==Lower ? 0 : i)] = real(mat[offset+(UpLo==Lower ? 0 : i)]); + offset += UpLo==Lower ? size-i : (i+1); } } }; -- cgit v1.2.3 From 1b8f4164082f82265f8118d89de71bb14d7f0eb6 Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sat, 8 Sep 2012 22:51:40 +0800 Subject: Implement rank-1 update for self-adjoint packed matrices. --- blas/CMakeLists.txt | 4 ++-- blas/SelfadjointPackedProduct.h | 47 +++++++++++++++++++++++++++++++++++++++++ blas/common.h | 1 + blas/level2_real_impl.h | 47 +++++++++++++++++++++++++++++++++++++---- 4 files changed, 93 insertions(+), 6 deletions(-) create mode 100644 blas/SelfadjointPackedProduct.h (limited to 'blas') diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt index e46fde4d4..7ee6f0591 100644 --- a/blas/CMakeLists.txt +++ b/blas/CMakeLists.txt @@ -18,9 +18,9 @@ if(EIGEN_Fortran_COMPILER_WORKS) set(EigenBlas_SRCS ${EigenBlas_SRCS} complexdots.f srotm.f srotmg.f drotm.f drotmg.f - lsame.f dspmv.f dtpsv.f ssbmv.f sspr.f stpmv.f + lsame.f dspmv.f dtpsv.f ssbmv.f stpmv.f chbmv.f chpr.f ctpmv.f sspmv.f stpsv.f - zhbmv.f zhpr.f ztpmv.f chpmv.f ctpsv.f dsbmv.f dspr.f dtpmv.f + zhbmv.f zhpr.f ztpmv.f chpmv.f ctpsv.f dsbmv.f dtpmv.f zhpmv.f ztpsv.f dtbmv.f stbmv.f ctbmv.f ztbmv.f ) diff --git a/blas/SelfadjointPackedProduct.h b/blas/SelfadjointPackedProduct.h new file mode 100644 index 000000000..4ea36b569 --- /dev/null +++ b/blas/SelfadjointPackedProduct.h @@ -0,0 +1,47 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Chen-Pang He +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SELFADJOINT_PACKED_PRODUCT_H +#define EIGEN_SELFADJOINT_PACKED_PRODUCT_H + +namespace internal { + +/* Optimized matrix += alpha * uv' + * The matrix is in packed form. + * + * FIXME I always fail tests for complex self-adjoint matrices. + * + ******* FATAL ERROR - PARAMETER NUMBER 6 WAS CHANGED INCORRECTLY ******* + ******* xHPR FAILED ON CALL NUMBER: + 2: xHPR ('U', 1, 0.0, X, 1, AP) + */ +template +struct selfadjoint_packed_rank1_update +{ + static void run(Index size, Scalar* mat, const Scalar* vec, Scalar alpha) + { + typedef Map > OtherMap; + Index offset = 0; + + for (Index i=0; i >(mat+offset, UpLo==Lower ? size-i : (i+1)) + += alpha * conj(vec[i]) * OtherMap(vec+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1)); + //FIXME This should be handled outside. + mat[offset+(UpLo==Lower ? 0 : i)] = real(mat[offset+(UpLo==Lower ? 0 : i)]); + offset += UpLo==Lower ? size-i : (i+1); + } + } +}; + +//TODO struct selfadjoint_packed_product_selector + +} // end namespace internal + +#endif // EIGEN_SELFADJOINT_PACKED_PRODUCT_H diff --git a/blas/common.h b/blas/common.h index 26b4ed5a3..a14d32289 100644 --- a/blas/common.h +++ b/blas/common.h @@ -76,6 +76,7 @@ namespace Eigen { #include "BandTriangularSolver.h" #include "GeneralRank1Update.h" #include "Rank2Update.h" +#include "SelfadjointPackedProduct.h" } using namespace Eigen; diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h index ca4469d7a..735545e2b 100644 --- a/blas/level2_real_impl.h +++ b/blas/level2_real_impl.h @@ -231,10 +231,49 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px * where alpha is a real scalar, x is an n element vector and A is an * n by n symmetric matrix, supplied in packed form. */ -// int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *alpha, Scalar *x, int *incx, Scalar *ap) -// { -// return 1; -// } +int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *pap) +{ + typedef void (*functype)(int, Scalar*, const Scalar*, Scalar); + static functype func[2]; + + static bool init = false; + if(!init) + { + for(int k=0; k<2; ++k) + func[k] = 0; + + func[UP] = (internal::selfadjoint_packed_rank1_update::run); + func[LO] = (internal::selfadjoint_packed_rank1_update::run); + + init = true; + } + + Scalar* x = reinterpret_cast(px); + Scalar* ap = reinterpret_cast(pap); + Scalar alpha = *reinterpret_cast(palpha); + + int info = 0; + if(UPLO(*uplo)==INVALID) info = 1; + else if(*n<0) info = 2; + else if(*incx==0) info = 5; + if(info) + return xerbla_(SCALAR_SUFFIX_UP"SPR ",&info,6); + + if(alpha==Scalar(0)) + return 1; + + Scalar* x_cpy = get_compact_vector(x, *n, *incx); + + int code = UPLO(*uplo); + if(code>=2 || func[code]==0) + return 0; + + func[code](*n, ap, x_cpy, alpha); + + if(x_cpy!=x) delete[] x_cpy; + + return 1; +} /** DSPR2 performs the symmetric rank 2 operation * -- cgit v1.2.3 From 669db3d7768b3b94d31d6552a1012ee29f54b8d8 Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sun, 9 Sep 2012 02:55:10 +0800 Subject: Extend rank-1 updates for different storage orders. --- blas/GeneralRank1Update.h | 21 ++++++++++++--- blas/PackedSelfadjointProduct.h | 59 +++++++++++++++++++++++++++++++++++++++++ blas/SelfadjointPackedProduct.h | 47 -------------------------------- blas/common.h | 2 +- blas/level2_cplx_impl.h | 4 +-- blas/level2_real_impl.h | 6 ++--- 6 files changed, 82 insertions(+), 57 deletions(-) create mode 100644 blas/PackedSelfadjointProduct.h delete mode 100644 blas/SelfadjointPackedProduct.h (limited to 'blas') diff --git a/blas/GeneralRank1Update.h b/blas/GeneralRank1Update.h index a3301ed92..6d33fbcc1 100644 --- a/blas/GeneralRank1Update.h +++ b/blas/GeneralRank1Update.h @@ -13,15 +13,28 @@ namespace internal { /* Optimized matrix += alpha * uv' */ -template -struct general_rank1_update +template +struct general_rank1_update; + +template +struct general_rank1_update { static void run(Index rows, Index cols, Scalar* mat, Index stride, const Scalar* u, const Scalar* v, Scalar alpha) { - typedef Matrix PlainVector; internal::conj_if cj; + typedef Map > OtherMap; + typedef typename internal::conditional::type ConjRhsType; for (Index i=0; i(mat+stride*i,rows) += alpha * cj(v[i]) * Map(u,rows); + Map >(mat+stride*i,rows) += alpha * cj(v[i]) * ConjRhsType(OtherMap(u,rows)); + } +}; + +template +struct general_rank1_update +{ + static void run(Index rows, Index cols, Scalar* mat, Index stride, const Scalar* u, const Scalar* v, Scalar alpha) + { + general_rank1_update::run(rows,cols,mat,stride,u,v,alpha); } }; diff --git a/blas/PackedSelfadjointProduct.h b/blas/PackedSelfadjointProduct.h new file mode 100644 index 000000000..adc86ece1 --- /dev/null +++ b/blas/PackedSelfadjointProduct.h @@ -0,0 +1,59 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Chen-Pang He +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SELFADJOINT_PACKED_PRODUCT_H +#define EIGEN_SELFADJOINT_PACKED_PRODUCT_H + +namespace internal { + +/* Optimized matrix += alpha * uv' + * The matrix is in packed form. + * + * FIXME I always fail tests for complex self-adjoint matrices. + * + ******* FATAL ERROR - PARAMETER NUMBER 6 WAS CHANGED INCORRECTLY ******* + ******* xHPR FAILED ON CALL NUMBER: + 2: xHPR ('U', 1, 0.0, X, 1, AP) + */ +template +struct selfadjoint_packed_rank1_update; + +template +struct selfadjoint_packed_rank1_update +{ + static void run(Index size, Scalar* mat, const Scalar* vec, Scalar alpha) + { + internal::conj_if cj; + typedef Map > OtherMap; + typedef typename internal::conditional::type ConjRhsType; + Index offset = 0; + + for (Index i=0; i >(mat+offset, UpLo==Lower ? size-i : (i+1)) + += alpha * cj(vec[i]) * ConjRhsType(OtherMap(vec+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1))); + //FIXME This should be handled outside. + mat[offset+(UpLo==Lower ? 0 : i)] = real(mat[offset+(UpLo==Lower ? 0 : i)]); + offset += UpLo==Lower ? size-i : (i+1); + } + } +}; + +template +struct selfadjoint_packed_rank1_update +{ + static void run(Index size, Scalar* mat, const Scalar* vec, Scalar alpha) + { + selfadjoint_packed_rank1_update::run(size,mat,vec,alpha); + } +}; + +} // end namespace internal + +#endif // EIGEN_SELFADJOINT_PACKED_PRODUCT_H diff --git a/blas/SelfadjointPackedProduct.h b/blas/SelfadjointPackedProduct.h deleted file mode 100644 index 4ea36b569..000000000 --- a/blas/SelfadjointPackedProduct.h +++ /dev/null @@ -1,47 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Chen-Pang He -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SELFADJOINT_PACKED_PRODUCT_H -#define EIGEN_SELFADJOINT_PACKED_PRODUCT_H - -namespace internal { - -/* Optimized matrix += alpha * uv' - * The matrix is in packed form. - * - * FIXME I always fail tests for complex self-adjoint matrices. - * - ******* FATAL ERROR - PARAMETER NUMBER 6 WAS CHANGED INCORRECTLY ******* - ******* xHPR FAILED ON CALL NUMBER: - 2: xHPR ('U', 1, 0.0, X, 1, AP) - */ -template -struct selfadjoint_packed_rank1_update -{ - static void run(Index size, Scalar* mat, const Scalar* vec, Scalar alpha) - { - typedef Map > OtherMap; - Index offset = 0; - - for (Index i=0; i >(mat+offset, UpLo==Lower ? size-i : (i+1)) - += alpha * conj(vec[i]) * OtherMap(vec+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1)); - //FIXME This should be handled outside. - mat[offset+(UpLo==Lower ? 0 : i)] = real(mat[offset+(UpLo==Lower ? 0 : i)]); - offset += UpLo==Lower ? size-i : (i+1); - } - } -}; - -//TODO struct selfadjoint_packed_product_selector - -} // end namespace internal - -#endif // EIGEN_SELFADJOINT_PACKED_PRODUCT_H diff --git a/blas/common.h b/blas/common.h index a14d32289..3160d3b41 100644 --- a/blas/common.h +++ b/blas/common.h @@ -75,8 +75,8 @@ inline bool check_uplo(const char* uplo) namespace Eigen { #include "BandTriangularSolver.h" #include "GeneralRank1Update.h" +#include "PackedSelfadjointProduct.h" #include "Rank2Update.h" -#include "SelfadjointPackedProduct.h" } using namespace Eigen; diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h index 46bddc134..11ee13b4c 100644 --- a/blas/level2_cplx_impl.h +++ b/blas/level2_cplx_impl.h @@ -309,7 +309,7 @@ int EIGEN_BLAS_FUNC(geru)(int *m, int *n, RealScalar *palpha, RealScalar *px, in Scalar* x_cpy = get_compact_vector(x,*m,*incx); Scalar* y_cpy = get_compact_vector(y,*n,*incy); - internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); + internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); if(x_cpy!=x) delete[] x_cpy; if(y_cpy!=y) delete[] y_cpy; @@ -346,7 +346,7 @@ int EIGEN_BLAS_FUNC(gerc)(int *m, int *n, RealScalar *palpha, RealScalar *px, in Scalar* x_cpy = get_compact_vector(x,*m,*incx); Scalar* y_cpy = get_compact_vector(y,*n,*incy); - internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); + internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); if(x_cpy!=x) delete[] x_cpy; if(y_cpy!=y) delete[] y_cpy; diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h index 735545e2b..38b0dadb6 100644 --- a/blas/level2_real_impl.h +++ b/blas/level2_real_impl.h @@ -242,8 +242,8 @@ int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *in for(int k=0; k<2; ++k) func[k] = 0; - func[UP] = (internal::selfadjoint_packed_rank1_update::run); - func[LO] = (internal::selfadjoint_packed_rank1_update::run); + func[UP] = (internal::selfadjoint_packed_rank1_update::run); + func[LO] = (internal::selfadjoint_packed_rank1_update::run); init = true; } @@ -359,7 +359,7 @@ int EIGEN_BLAS_FUNC(ger)(int *m, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar* x_cpy = get_compact_vector(x,*m,*incx); Scalar* y_cpy = get_compact_vector(y,*n,*incy); - internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); + internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); if(x_cpy!=x) delete[] x_cpy; if(y_cpy!=y) delete[] y_cpy; -- cgit v1.2.3 From 2828c995c55891b0769b6a4cf3a9e99fe53b01d3 Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sun, 9 Sep 2012 21:35:28 +0800 Subject: Use conj_expr_if to clarify what it's doing. --- blas/GeneralRank1Update.h | 5 +++-- blas/PackedSelfadjointProduct.h | 10 +++++----- blas/level2_real_impl.h | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'blas') diff --git a/blas/GeneralRank1Update.h b/blas/GeneralRank1Update.h index 6d33fbcc1..07d388c88 100644 --- a/blas/GeneralRank1Update.h +++ b/blas/GeneralRank1Update.h @@ -21,9 +21,10 @@ struct general_rank1_update { static void run(Index rows, Index cols, Scalar* mat, Index stride, const Scalar* u, const Scalar* v, Scalar alpha) { - internal::conj_if cj; typedef Map > OtherMap; - typedef typename internal::conditional::type ConjRhsType; + typedef typename conj_expr_if::type ConjRhsType; + conj_if cj; + for (Index i=0; i >(mat+stride*i,rows) += alpha * cj(v[i]) * ConjRhsType(OtherMap(u,rows)); } diff --git a/blas/PackedSelfadjointProduct.h b/blas/PackedSelfadjointProduct.h index adc86ece1..1ba67b9c1 100644 --- a/blas/PackedSelfadjointProduct.h +++ b/blas/PackedSelfadjointProduct.h @@ -17,9 +17,9 @@ namespace internal { * * FIXME I always fail tests for complex self-adjoint matrices. * - ******* FATAL ERROR - PARAMETER NUMBER 6 WAS CHANGED INCORRECTLY ******* - ******* xHPR FAILED ON CALL NUMBER: - 2: xHPR ('U', 1, 0.0, X, 1, AP) + * ******* FATAL ERROR - PARAMETER NUMBER 6 WAS CHANGED INCORRECTLY ******* + * ******* xHPR FAILED ON CALL NUMBER: + * 2: xHPR ('U', 1, 0.0, X, 1, AP) */ template struct selfadjoint_packed_rank1_update; @@ -29,9 +29,9 @@ struct selfadjoint_packed_rank1_update cj; typedef Map > OtherMap; - typedef typename internal::conditional::type ConjRhsType; + typedef typename conj_expr_if::type ConjRhsType; + conj_if cj; Index offset = 0; for (Index i=0; i::run); - func[LO] = (internal::selfadjoint_packed_rank1_update::run); + func[UP] = (internal::selfadjoint_packed_rank1_update::run); + func[LO] = (internal::selfadjoint_packed_rank1_update::run); init = true; } -- cgit v1.2.3 From 3642ca4d465f347188e155aab4464b6e814855cb Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sun, 9 Sep 2012 23:34:45 +0800 Subject: Implement packed triangular matrix-vector product. --- blas/CMakeLists.txt | 6 +- blas/PackedTriangularMatrixVector.h | 79 +++++++++ blas/common.h | 1 + blas/ctpmv.f | 329 ------------------------------------ blas/dtpmv.f | 293 -------------------------------- blas/level2_impl.h | 66 +++++++- blas/stpmv.f | 293 -------------------------------- blas/ztpmv.f | 329 ------------------------------------ 8 files changed, 144 insertions(+), 1252 deletions(-) create mode 100644 blas/PackedTriangularMatrixVector.h delete mode 100644 blas/ctpmv.f delete mode 100644 blas/dtpmv.f delete mode 100644 blas/stpmv.f delete mode 100644 blas/ztpmv.f (limited to 'blas') diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt index 7ee6f0591..171b75aa1 100644 --- a/blas/CMakeLists.txt +++ b/blas/CMakeLists.txt @@ -18,9 +18,9 @@ if(EIGEN_Fortran_COMPILER_WORKS) set(EigenBlas_SRCS ${EigenBlas_SRCS} complexdots.f srotm.f srotmg.f drotm.f drotmg.f - lsame.f dspmv.f dtpsv.f ssbmv.f stpmv.f - chbmv.f chpr.f ctpmv.f sspmv.f stpsv.f - zhbmv.f zhpr.f ztpmv.f chpmv.f ctpsv.f dsbmv.f dtpmv.f + lsame.f dspmv.f dtpsv.f ssbmv.f + chbmv.f chpr.f sspmv.f stpsv.f + zhbmv.f zhpr.f chpmv.f ctpsv.f dsbmv.f zhpmv.f ztpsv.f dtbmv.f stbmv.f ctbmv.f ztbmv.f ) diff --git a/blas/PackedTriangularMatrixVector.h b/blas/PackedTriangularMatrixVector.h new file mode 100644 index 000000000..e9886d56f --- /dev/null +++ b/blas/PackedTriangularMatrixVector.h @@ -0,0 +1,79 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Chen-Pang He +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PACKED_TRIANGULAR_MATRIX_VECTOR_H +#define EIGEN_PACKED_TRIANGULAR_MATRIX_VECTOR_H + +namespace internal { + +template +struct packed_triangular_matrix_vector_product; + +template +struct packed_triangular_matrix_vector_product +{ + typedef typename scalar_product_traits::ReturnType ResScalar; + enum { + IsLower = (Mode & Lower) ==Lower, + HasUnitDiag = (Mode & UnitDiag)==UnitDiag, + HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag + }; + static void run(Index size, const LhsScalar* lhs, const RhsScalar* rhs, ResScalar* res, ResScalar alpha) + { + internal::conj_if cj; + typedef Map > LhsMap; + typedef typename conj_expr_if::type ConjLhsType; + typedef Map > ResMap; + + for (Index i=0; i0)) + ResMap(res+(IsLower ? s+i : 0),r) += alpha * cj(rhs[i]) * ConjLhsType(LhsMap(lhs+s,r)); + if (HasUnitDiag) + res[i] += alpha * cj(rhs[i]); + lhs += IsLower ? size-i: i+1; + } + }; +}; + +template +struct packed_triangular_matrix_vector_product +{ + typedef typename scalar_product_traits::ReturnType ResScalar; + enum { + IsLower = (Mode & Lower) ==Lower, + HasUnitDiag = (Mode & UnitDiag)==UnitDiag, + HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag + }; + static void run(Index size, const LhsScalar* lhs, const RhsScalar* rhs, ResScalar* res, ResScalar alpha) + { + internal::conj_if cj; + typedef Map > LhsMap; + typedef typename conj_expr_if::type ConjLhsType; + typedef Map > RhsMap; + typedef typename conj_expr_if::type ConjRhsType; + + for (Index i=0; i0)) + res[i] += alpha * (ConjLhsType(LhsMap(lhs+s,r)).cwiseProduct(ConjRhsType(RhsMap(rhs+(IsLower ? 0 : s+i),r)))).sum(); + if (HasUnitDiag) + res[i] += alpha * cj(rhs[i]); + lhs += IsLower ? i+1 : size-i; + } + }; +}; + +} // end namespace internal + +#endif // EIGEN_PACKED_TRIANGULAR_MATRIX_VECTOR_H diff --git a/blas/common.h b/blas/common.h index 3160d3b41..1019d8623 100644 --- a/blas/common.h +++ b/blas/common.h @@ -76,6 +76,7 @@ namespace Eigen { #include "BandTriangularSolver.h" #include "GeneralRank1Update.h" #include "PackedSelfadjointProduct.h" +#include "PackedTriangularMatrixVector.h" #include "Rank2Update.h" } diff --git a/blas/ctpmv.f b/blas/ctpmv.f deleted file mode 100644 index b63742ccb..000000000 --- a/blas/ctpmv.f +++ /dev/null @@ -1,329 +0,0 @@ - SUBROUTINE CTPMV(UPLO,TRANS,DIAG,N,AP,X,INCX) -* .. Scalar Arguments .. - INTEGER INCX,N - CHARACTER DIAG,TRANS,UPLO -* .. -* .. Array Arguments .. - COMPLEX AP(*),X(*) -* .. -* -* Purpose -* ======= -* -* CTPMV performs one of the matrix-vector operations -* -* x := A*x, or x := A'*x, or x := conjg( A' )*x, -* -* where x is an n element vector and A is an n by n unit, or non-unit, -* upper or lower triangular matrix, supplied in packed form. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the matrix is an upper or -* lower triangular matrix as follows: -* -* UPLO = 'U' or 'u' A is an upper triangular matrix. -* -* UPLO = 'L' or 'l' A is a lower triangular matrix. -* -* Unchanged on exit. -* -* TRANS - CHARACTER*1. -* On entry, TRANS specifies the operation to be performed as -* follows: -* -* TRANS = 'N' or 'n' x := A*x. -* -* TRANS = 'T' or 't' x := A'*x. -* -* TRANS = 'C' or 'c' x := conjg( A' )*x. -* -* Unchanged on exit. -* -* DIAG - CHARACTER*1. -* On entry, DIAG specifies whether or not A is unit -* triangular as follows: -* -* DIAG = 'U' or 'u' A is assumed to be unit triangular. -* -* DIAG = 'N' or 'n' A is not assumed to be unit -* triangular. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* AP - COMPLEX array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular matrix packed sequentially, -* column by column, so that AP( 1 ) contains a( 1, 1 ), -* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) -* respectively, and so on. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular matrix packed sequentially, -* column by column, so that AP( 1 ) contains a( 1, 1 ), -* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) -* respectively, and so on. -* Note that when DIAG = 'U' or 'u', the diagonal elements of -* A are not referenced, but are assumed to be unity. -* Unchanged on exit. -* -* X - COMPLEX array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. On exit, X is overwritten with the -* tranformed vector x. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - COMPLEX ZERO - PARAMETER (ZERO= (0.0E+0,0.0E+0)) -* .. -* .. Local Scalars .. - COMPLEX TEMP - INTEGER I,INFO,IX,J,JX,K,KK,KX - LOGICAL NOCONJ,NOUNIT -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC CONJG -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. - + .NOT.LSAME(TRANS,'C')) THEN - INFO = 2 - ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN - INFO = 3 - ELSE IF (N.LT.0) THEN - INFO = 4 - ELSE IF (INCX.EQ.0) THEN - INFO = 7 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('CTPMV ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF (N.EQ.0) RETURN -* - NOCONJ = LSAME(TRANS,'T') - NOUNIT = LSAME(DIAG,'N') -* -* Set up the start point in X if the increment is not unity. This -* will be ( N - 1 )*INCX too small for descending loops. -* - IF (INCX.LE.0) THEN - KX = 1 - (N-1)*INCX - ELSE IF (INCX.NE.1) THEN - KX = 1 - END IF -* -* Start the operations. In this version the elements of AP are -* accessed sequentially with one pass through AP. -* - IF (LSAME(TRANS,'N')) THEN -* -* Form x:= A*x. -* - IF (LSAME(UPLO,'U')) THEN - KK = 1 - IF (INCX.EQ.1) THEN - DO 20 J = 1,N - IF (X(J).NE.ZERO) THEN - TEMP = X(J) - K = KK - DO 10 I = 1,J - 1 - X(I) = X(I) + TEMP*AP(K) - K = K + 1 - 10 CONTINUE - IF (NOUNIT) X(J) = X(J)*AP(KK+J-1) - END IF - KK = KK + J - 20 CONTINUE - ELSE - JX = KX - DO 40 J = 1,N - IF (X(JX).NE.ZERO) THEN - TEMP = X(JX) - IX = KX - DO 30 K = KK,KK + J - 2 - X(IX) = X(IX) + TEMP*AP(K) - IX = IX + INCX - 30 CONTINUE - IF (NOUNIT) X(JX) = X(JX)*AP(KK+J-1) - END IF - JX = JX + INCX - KK = KK + J - 40 CONTINUE - END IF - ELSE - KK = (N* (N+1))/2 - IF (INCX.EQ.1) THEN - DO 60 J = N,1,-1 - IF (X(J).NE.ZERO) THEN - TEMP = X(J) - K = KK - DO 50 I = N,J + 1,-1 - X(I) = X(I) + TEMP*AP(K) - K = K - 1 - 50 CONTINUE - IF (NOUNIT) X(J) = X(J)*AP(KK-N+J) - END IF - KK = KK - (N-J+1) - 60 CONTINUE - ELSE - KX = KX + (N-1)*INCX - JX = KX - DO 80 J = N,1,-1 - IF (X(JX).NE.ZERO) THEN - TEMP = X(JX) - IX = KX - DO 70 K = KK,KK - (N- (J+1)),-1 - X(IX) = X(IX) + TEMP*AP(K) - IX = IX - INCX - 70 CONTINUE - IF (NOUNIT) X(JX) = X(JX)*AP(KK-N+J) - END IF - JX = JX - INCX - KK = KK - (N-J+1) - 80 CONTINUE - END IF - END IF - ELSE -* -* Form x := A'*x or x := conjg( A' )*x. -* - IF (LSAME(UPLO,'U')) THEN - KK = (N* (N+1))/2 - IF (INCX.EQ.1) THEN - DO 110 J = N,1,-1 - TEMP = X(J) - K = KK - 1 - IF (NOCONJ) THEN - IF (NOUNIT) TEMP = TEMP*AP(KK) - DO 90 I = J - 1,1,-1 - TEMP = TEMP + AP(K)*X(I) - K = K - 1 - 90 CONTINUE - ELSE - IF (NOUNIT) TEMP = TEMP*CONJG(AP(KK)) - DO 100 I = J - 1,1,-1 - TEMP = TEMP + CONJG(AP(K))*X(I) - K = K - 1 - 100 CONTINUE - END IF - X(J) = TEMP - KK = KK - J - 110 CONTINUE - ELSE - JX = KX + (N-1)*INCX - DO 140 J = N,1,-1 - TEMP = X(JX) - IX = JX - IF (NOCONJ) THEN - IF (NOUNIT) TEMP = TEMP*AP(KK) - DO 120 K = KK - 1,KK - J + 1,-1 - IX = IX - INCX - TEMP = TEMP + AP(K)*X(IX) - 120 CONTINUE - ELSE - IF (NOUNIT) TEMP = TEMP*CONJG(AP(KK)) - DO 130 K = KK - 1,KK - J + 1,-1 - IX = IX - INCX - TEMP = TEMP + CONJG(AP(K))*X(IX) - 130 CONTINUE - END IF - X(JX) = TEMP - JX = JX - INCX - KK = KK - J - 140 CONTINUE - END IF - ELSE - KK = 1 - IF (INCX.EQ.1) THEN - DO 170 J = 1,N - TEMP = X(J) - K = KK + 1 - IF (NOCONJ) THEN - IF (NOUNIT) TEMP = TEMP*AP(KK) - DO 150 I = J + 1,N - TEMP = TEMP + AP(K)*X(I) - K = K + 1 - 150 CONTINUE - ELSE - IF (NOUNIT) TEMP = TEMP*CONJG(AP(KK)) - DO 160 I = J + 1,N - TEMP = TEMP + CONJG(AP(K))*X(I) - K = K + 1 - 160 CONTINUE - END IF - X(J) = TEMP - KK = KK + (N-J+1) - 170 CONTINUE - ELSE - JX = KX - DO 200 J = 1,N - TEMP = X(JX) - IX = JX - IF (NOCONJ) THEN - IF (NOUNIT) TEMP = TEMP*AP(KK) - DO 180 K = KK + 1,KK + N - J - IX = IX + INCX - TEMP = TEMP + AP(K)*X(IX) - 180 CONTINUE - ELSE - IF (NOUNIT) TEMP = TEMP*CONJG(AP(KK)) - DO 190 K = KK + 1,KK + N - J - IX = IX + INCX - TEMP = TEMP + CONJG(AP(K))*X(IX) - 190 CONTINUE - END IF - X(JX) = TEMP - JX = JX + INCX - KK = KK + (N-J+1) - 200 CONTINUE - END IF - END IF - END IF -* - RETURN -* -* End of CTPMV . -* - END diff --git a/blas/dtpmv.f b/blas/dtpmv.f deleted file mode 100644 index c5bc112dc..000000000 --- a/blas/dtpmv.f +++ /dev/null @@ -1,293 +0,0 @@ - SUBROUTINE DTPMV(UPLO,TRANS,DIAG,N,AP,X,INCX) -* .. Scalar Arguments .. - INTEGER INCX,N - CHARACTER DIAG,TRANS,UPLO -* .. -* .. Array Arguments .. - DOUBLE PRECISION AP(*),X(*) -* .. -* -* Purpose -* ======= -* -* DTPMV performs one of the matrix-vector operations -* -* x := A*x, or x := A'*x, -* -* where x is an n element vector and A is an n by n unit, or non-unit, -* upper or lower triangular matrix, supplied in packed form. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the matrix is an upper or -* lower triangular matrix as follows: -* -* UPLO = 'U' or 'u' A is an upper triangular matrix. -* -* UPLO = 'L' or 'l' A is a lower triangular matrix. -* -* Unchanged on exit. -* -* TRANS - CHARACTER*1. -* On entry, TRANS specifies the operation to be performed as -* follows: -* -* TRANS = 'N' or 'n' x := A*x. -* -* TRANS = 'T' or 't' x := A'*x. -* -* TRANS = 'C' or 'c' x := A'*x. -* -* Unchanged on exit. -* -* DIAG - CHARACTER*1. -* On entry, DIAG specifies whether or not A is unit -* triangular as follows: -* -* DIAG = 'U' or 'u' A is assumed to be unit triangular. -* -* DIAG = 'N' or 'n' A is not assumed to be unit -* triangular. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* AP - DOUBLE PRECISION array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular matrix packed sequentially, -* column by column, so that AP( 1 ) contains a( 1, 1 ), -* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) -* respectively, and so on. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular matrix packed sequentially, -* column by column, so that AP( 1 ) contains a( 1, 1 ), -* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) -* respectively, and so on. -* Note that when DIAG = 'U' or 'u', the diagonal elements of -* A are not referenced, but are assumed to be unity. -* Unchanged on exit. -* -* X - DOUBLE PRECISION array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. On exit, X is overwritten with the -* tranformed vector x. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - DOUBLE PRECISION ZERO - PARAMETER (ZERO=0.0D+0) -* .. -* .. Local Scalars .. - DOUBLE PRECISION TEMP - INTEGER I,INFO,IX,J,JX,K,KK,KX - LOGICAL NOUNIT -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. - + .NOT.LSAME(TRANS,'C')) THEN - INFO = 2 - ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN - INFO = 3 - ELSE IF (N.LT.0) THEN - INFO = 4 - ELSE IF (INCX.EQ.0) THEN - INFO = 7 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('DTPMV ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF (N.EQ.0) RETURN -* - NOUNIT = LSAME(DIAG,'N') -* -* Set up the start point in X if the increment is not unity. This -* will be ( N - 1 )*INCX too small for descending loops. -* - IF (INCX.LE.0) THEN - KX = 1 - (N-1)*INCX - ELSE IF (INCX.NE.1) THEN - KX = 1 - END IF -* -* Start the operations. In this version the elements of AP are -* accessed sequentially with one pass through AP. -* - IF (LSAME(TRANS,'N')) THEN -* -* Form x:= A*x. -* - IF (LSAME(UPLO,'U')) THEN - KK = 1 - IF (INCX.EQ.1) THEN - DO 20 J = 1,N - IF (X(J).NE.ZERO) THEN - TEMP = X(J) - K = KK - DO 10 I = 1,J - 1 - X(I) = X(I) + TEMP*AP(K) - K = K + 1 - 10 CONTINUE - IF (NOUNIT) X(J) = X(J)*AP(KK+J-1) - END IF - KK = KK + J - 20 CONTINUE - ELSE - JX = KX - DO 40 J = 1,N - IF (X(JX).NE.ZERO) THEN - TEMP = X(JX) - IX = KX - DO 30 K = KK,KK + J - 2 - X(IX) = X(IX) + TEMP*AP(K) - IX = IX + INCX - 30 CONTINUE - IF (NOUNIT) X(JX) = X(JX)*AP(KK+J-1) - END IF - JX = JX + INCX - KK = KK + J - 40 CONTINUE - END IF - ELSE - KK = (N* (N+1))/2 - IF (INCX.EQ.1) THEN - DO 60 J = N,1,-1 - IF (X(J).NE.ZERO) THEN - TEMP = X(J) - K = KK - DO 50 I = N,J + 1,-1 - X(I) = X(I) + TEMP*AP(K) - K = K - 1 - 50 CONTINUE - IF (NOUNIT) X(J) = X(J)*AP(KK-N+J) - END IF - KK = KK - (N-J+1) - 60 CONTINUE - ELSE - KX = KX + (N-1)*INCX - JX = KX - DO 80 J = N,1,-1 - IF (X(JX).NE.ZERO) THEN - TEMP = X(JX) - IX = KX - DO 70 K = KK,KK - (N- (J+1)),-1 - X(IX) = X(IX) + TEMP*AP(K) - IX = IX - INCX - 70 CONTINUE - IF (NOUNIT) X(JX) = X(JX)*AP(KK-N+J) - END IF - JX = JX - INCX - KK = KK - (N-J+1) - 80 CONTINUE - END IF - END IF - ELSE -* -* Form x := A'*x. -* - IF (LSAME(UPLO,'U')) THEN - KK = (N* (N+1))/2 - IF (INCX.EQ.1) THEN - DO 100 J = N,1,-1 - TEMP = X(J) - IF (NOUNIT) TEMP = TEMP*AP(KK) - K = KK - 1 - DO 90 I = J - 1,1,-1 - TEMP = TEMP + AP(K)*X(I) - K = K - 1 - 90 CONTINUE - X(J) = TEMP - KK = KK - J - 100 CONTINUE - ELSE - JX = KX + (N-1)*INCX - DO 120 J = N,1,-1 - TEMP = X(JX) - IX = JX - IF (NOUNIT) TEMP = TEMP*AP(KK) - DO 110 K = KK - 1,KK - J + 1,-1 - IX = IX - INCX - TEMP = TEMP + AP(K)*X(IX) - 110 CONTINUE - X(JX) = TEMP - JX = JX - INCX - KK = KK - J - 120 CONTINUE - END IF - ELSE - KK = 1 - IF (INCX.EQ.1) THEN - DO 140 J = 1,N - TEMP = X(J) - IF (NOUNIT) TEMP = TEMP*AP(KK) - K = KK + 1 - DO 130 I = J + 1,N - TEMP = TEMP + AP(K)*X(I) - K = K + 1 - 130 CONTINUE - X(J) = TEMP - KK = KK + (N-J+1) - 140 CONTINUE - ELSE - JX = KX - DO 160 J = 1,N - TEMP = X(JX) - IX = JX - IF (NOUNIT) TEMP = TEMP*AP(KK) - DO 150 K = KK + 1,KK + N - J - IX = IX + INCX - TEMP = TEMP + AP(K)*X(IX) - 150 CONTINUE - X(JX) = TEMP - JX = JX + INCX - KK = KK + (N-J+1) - 160 CONTINUE - END IF - END IF - END IF -* - RETURN -* -* End of DTPMV . -* - END diff --git a/blas/level2_impl.h b/blas/level2_impl.h index f1f7371ee..997ad016f 100644 --- a/blas/level2_impl.h +++ b/blas/level2_impl.h @@ -187,7 +187,7 @@ int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar copy_back(res.data(),b,*n,*incb); if(actual_b!=b) delete[] actual_b; - return 0; + return 1; } /** GBMV performs one of the matrix-vector operations @@ -399,10 +399,66 @@ int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, Real * where x is an n element vector and A is an n by n unit, or non-unit, * upper or lower triangular matrix, supplied in packed form. */ -// int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *trans, char *diag, int *n, RealScalar *ap, RealScalar *x, int *incx) -// { -// return 1; -// } +int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) +{ + typedef void (*functype)(int, const Scalar*, const Scalar*, Scalar*, Scalar); + static functype func[16]; + + static bool init = false; + if(!init) + { + for(int k=0; k<16; ++k) + func[k] = 0; + + func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); + func[TR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); + func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); + + func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); + func[TR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); + func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); + + func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); + func[TR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); + func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); + + func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); + func[TR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); + func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); + + init = true; + } + + Scalar* ap = reinterpret_cast(pap); + Scalar* x = reinterpret_cast(px); + + int info = 0; + if(UPLO(*uplo)==INVALID) info = 1; + else if(OP(*opa)==INVALID) info = 2; + else if(DIAG(*diag)==INVALID) info = 3; + else if(*n<0) info = 4; + else if(*incx==0) info = 7; + if(info) + return xerbla_(SCALAR_SUFFIX_UP"TPMV ",&info,6); + + if(*n==0) + return 1; + + Scalar* actual_x = get_compact_vector(x,*n,*incx); + Matrix res(*n); + res.setZero(); + + int code = OP(*opa) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3); + if(code>=16 || func[code]==0) + return 0; + + func[code](*n, ap, actual_x, res.data(), Scalar(1)); + + copy_back(res.data(),x,*n,*incx); + if(actual_x!=x) delete[] actual_x; + + return 1; +} /** DTPSV solves one of the systems of equations * diff --git a/blas/stpmv.f b/blas/stpmv.f deleted file mode 100644 index 71ea49a36..000000000 --- a/blas/stpmv.f +++ /dev/null @@ -1,293 +0,0 @@ - SUBROUTINE STPMV(UPLO,TRANS,DIAG,N,AP,X,INCX) -* .. Scalar Arguments .. - INTEGER INCX,N - CHARACTER DIAG,TRANS,UPLO -* .. -* .. Array Arguments .. - REAL AP(*),X(*) -* .. -* -* Purpose -* ======= -* -* STPMV performs one of the matrix-vector operations -* -* x := A*x, or x := A'*x, -* -* where x is an n element vector and A is an n by n unit, or non-unit, -* upper or lower triangular matrix, supplied in packed form. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the matrix is an upper or -* lower triangular matrix as follows: -* -* UPLO = 'U' or 'u' A is an upper triangular matrix. -* -* UPLO = 'L' or 'l' A is a lower triangular matrix. -* -* Unchanged on exit. -* -* TRANS - CHARACTER*1. -* On entry, TRANS specifies the operation to be performed as -* follows: -* -* TRANS = 'N' or 'n' x := A*x. -* -* TRANS = 'T' or 't' x := A'*x. -* -* TRANS = 'C' or 'c' x := A'*x. -* -* Unchanged on exit. -* -* DIAG - CHARACTER*1. -* On entry, DIAG specifies whether or not A is unit -* triangular as follows: -* -* DIAG = 'U' or 'u' A is assumed to be unit triangular. -* -* DIAG = 'N' or 'n' A is not assumed to be unit -* triangular. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* AP - REAL array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular matrix packed sequentially, -* column by column, so that AP( 1 ) contains a( 1, 1 ), -* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) -* respectively, and so on. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular matrix packed sequentially, -* column by column, so that AP( 1 ) contains a( 1, 1 ), -* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) -* respectively, and so on. -* Note that when DIAG = 'U' or 'u', the diagonal elements of -* A are not referenced, but are assumed to be unity. -* Unchanged on exit. -* -* X - REAL array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. On exit, X is overwritten with the -* tranformed vector x. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - REAL ZERO - PARAMETER (ZERO=0.0E+0) -* .. -* .. Local Scalars .. - REAL TEMP - INTEGER I,INFO,IX,J,JX,K,KK,KX - LOGICAL NOUNIT -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. - + .NOT.LSAME(TRANS,'C')) THEN - INFO = 2 - ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN - INFO = 3 - ELSE IF (N.LT.0) THEN - INFO = 4 - ELSE IF (INCX.EQ.0) THEN - INFO = 7 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('STPMV ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF (N.EQ.0) RETURN -* - NOUNIT = LSAME(DIAG,'N') -* -* Set up the start point in X if the increment is not unity. This -* will be ( N - 1 )*INCX too small for descending loops. -* - IF (INCX.LE.0) THEN - KX = 1 - (N-1)*INCX - ELSE IF (INCX.NE.1) THEN - KX = 1 - END IF -* -* Start the operations. In this version the elements of AP are -* accessed sequentially with one pass through AP. -* - IF (LSAME(TRANS,'N')) THEN -* -* Form x:= A*x. -* - IF (LSAME(UPLO,'U')) THEN - KK = 1 - IF (INCX.EQ.1) THEN - DO 20 J = 1,N - IF (X(J).NE.ZERO) THEN - TEMP = X(J) - K = KK - DO 10 I = 1,J - 1 - X(I) = X(I) + TEMP*AP(K) - K = K + 1 - 10 CONTINUE - IF (NOUNIT) X(J) = X(J)*AP(KK+J-1) - END IF - KK = KK + J - 20 CONTINUE - ELSE - JX = KX - DO 40 J = 1,N - IF (X(JX).NE.ZERO) THEN - TEMP = X(JX) - IX = KX - DO 30 K = KK,KK + J - 2 - X(IX) = X(IX) + TEMP*AP(K) - IX = IX + INCX - 30 CONTINUE - IF (NOUNIT) X(JX) = X(JX)*AP(KK+J-1) - END IF - JX = JX + INCX - KK = KK + J - 40 CONTINUE - END IF - ELSE - KK = (N* (N+1))/2 - IF (INCX.EQ.1) THEN - DO 60 J = N,1,-1 - IF (X(J).NE.ZERO) THEN - TEMP = X(J) - K = KK - DO 50 I = N,J + 1,-1 - X(I) = X(I) + TEMP*AP(K) - K = K - 1 - 50 CONTINUE - IF (NOUNIT) X(J) = X(J)*AP(KK-N+J) - END IF - KK = KK - (N-J+1) - 60 CONTINUE - ELSE - KX = KX + (N-1)*INCX - JX = KX - DO 80 J = N,1,-1 - IF (X(JX).NE.ZERO) THEN - TEMP = X(JX) - IX = KX - DO 70 K = KK,KK - (N- (J+1)),-1 - X(IX) = X(IX) + TEMP*AP(K) - IX = IX - INCX - 70 CONTINUE - IF (NOUNIT) X(JX) = X(JX)*AP(KK-N+J) - END IF - JX = JX - INCX - KK = KK - (N-J+1) - 80 CONTINUE - END IF - END IF - ELSE -* -* Form x := A'*x. -* - IF (LSAME(UPLO,'U')) THEN - KK = (N* (N+1))/2 - IF (INCX.EQ.1) THEN - DO 100 J = N,1,-1 - TEMP = X(J) - IF (NOUNIT) TEMP = TEMP*AP(KK) - K = KK - 1 - DO 90 I = J - 1,1,-1 - TEMP = TEMP + AP(K)*X(I) - K = K - 1 - 90 CONTINUE - X(J) = TEMP - KK = KK - J - 100 CONTINUE - ELSE - JX = KX + (N-1)*INCX - DO 120 J = N,1,-1 - TEMP = X(JX) - IX = JX - IF (NOUNIT) TEMP = TEMP*AP(KK) - DO 110 K = KK - 1,KK - J + 1,-1 - IX = IX - INCX - TEMP = TEMP + AP(K)*X(IX) - 110 CONTINUE - X(JX) = TEMP - JX = JX - INCX - KK = KK - J - 120 CONTINUE - END IF - ELSE - KK = 1 - IF (INCX.EQ.1) THEN - DO 140 J = 1,N - TEMP = X(J) - IF (NOUNIT) TEMP = TEMP*AP(KK) - K = KK + 1 - DO 130 I = J + 1,N - TEMP = TEMP + AP(K)*X(I) - K = K + 1 - 130 CONTINUE - X(J) = TEMP - KK = KK + (N-J+1) - 140 CONTINUE - ELSE - JX = KX - DO 160 J = 1,N - TEMP = X(JX) - IX = JX - IF (NOUNIT) TEMP = TEMP*AP(KK) - DO 150 K = KK + 1,KK + N - J - IX = IX + INCX - TEMP = TEMP + AP(K)*X(IX) - 150 CONTINUE - X(JX) = TEMP - JX = JX + INCX - KK = KK + (N-J+1) - 160 CONTINUE - END IF - END IF - END IF -* - RETURN -* -* End of STPMV . -* - END diff --git a/blas/ztpmv.f b/blas/ztpmv.f deleted file mode 100644 index 5a7b3b8b7..000000000 --- a/blas/ztpmv.f +++ /dev/null @@ -1,329 +0,0 @@ - SUBROUTINE ZTPMV(UPLO,TRANS,DIAG,N,AP,X,INCX) -* .. Scalar Arguments .. - INTEGER INCX,N - CHARACTER DIAG,TRANS,UPLO -* .. -* .. Array Arguments .. - DOUBLE COMPLEX AP(*),X(*) -* .. -* -* Purpose -* ======= -* -* ZTPMV performs one of the matrix-vector operations -* -* x := A*x, or x := A'*x, or x := conjg( A' )*x, -* -* where x is an n element vector and A is an n by n unit, or non-unit, -* upper or lower triangular matrix, supplied in packed form. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the matrix is an upper or -* lower triangular matrix as follows: -* -* UPLO = 'U' or 'u' A is an upper triangular matrix. -* -* UPLO = 'L' or 'l' A is a lower triangular matrix. -* -* Unchanged on exit. -* -* TRANS - CHARACTER*1. -* On entry, TRANS specifies the operation to be performed as -* follows: -* -* TRANS = 'N' or 'n' x := A*x. -* -* TRANS = 'T' or 't' x := A'*x. -* -* TRANS = 'C' or 'c' x := conjg( A' )*x. -* -* Unchanged on exit. -* -* DIAG - CHARACTER*1. -* On entry, DIAG specifies whether or not A is unit -* triangular as follows: -* -* DIAG = 'U' or 'u' A is assumed to be unit triangular. -* -* DIAG = 'N' or 'n' A is not assumed to be unit -* triangular. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* AP - COMPLEX*16 array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular matrix packed sequentially, -* column by column, so that AP( 1 ) contains a( 1, 1 ), -* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) -* respectively, and so on. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular matrix packed sequentially, -* column by column, so that AP( 1 ) contains a( 1, 1 ), -* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) -* respectively, and so on. -* Note that when DIAG = 'U' or 'u', the diagonal elements of -* A are not referenced, but are assumed to be unity. -* Unchanged on exit. -* -* X - COMPLEX*16 array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. On exit, X is overwritten with the -* tranformed vector x. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - DOUBLE COMPLEX ZERO - PARAMETER (ZERO= (0.0D+0,0.0D+0)) -* .. -* .. Local Scalars .. - DOUBLE COMPLEX TEMP - INTEGER I,INFO,IX,J,JX,K,KK,KX - LOGICAL NOCONJ,NOUNIT -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC DCONJG -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. - + .NOT.LSAME(TRANS,'C')) THEN - INFO = 2 - ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN - INFO = 3 - ELSE IF (N.LT.0) THEN - INFO = 4 - ELSE IF (INCX.EQ.0) THEN - INFO = 7 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('ZTPMV ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF (N.EQ.0) RETURN -* - NOCONJ = LSAME(TRANS,'T') - NOUNIT = LSAME(DIAG,'N') -* -* Set up the start point in X if the increment is not unity. This -* will be ( N - 1 )*INCX too small for descending loops. -* - IF (INCX.LE.0) THEN - KX = 1 - (N-1)*INCX - ELSE IF (INCX.NE.1) THEN - KX = 1 - END IF -* -* Start the operations. In this version the elements of AP are -* accessed sequentially with one pass through AP. -* - IF (LSAME(TRANS,'N')) THEN -* -* Form x:= A*x. -* - IF (LSAME(UPLO,'U')) THEN - KK = 1 - IF (INCX.EQ.1) THEN - DO 20 J = 1,N - IF (X(J).NE.ZERO) THEN - TEMP = X(J) - K = KK - DO 10 I = 1,J - 1 - X(I) = X(I) + TEMP*AP(K) - K = K + 1 - 10 CONTINUE - IF (NOUNIT) X(J) = X(J)*AP(KK+J-1) - END IF - KK = KK + J - 20 CONTINUE - ELSE - JX = KX - DO 40 J = 1,N - IF (X(JX).NE.ZERO) THEN - TEMP = X(JX) - IX = KX - DO 30 K = KK,KK + J - 2 - X(IX) = X(IX) + TEMP*AP(K) - IX = IX + INCX - 30 CONTINUE - IF (NOUNIT) X(JX) = X(JX)*AP(KK+J-1) - END IF - JX = JX + INCX - KK = KK + J - 40 CONTINUE - END IF - ELSE - KK = (N* (N+1))/2 - IF (INCX.EQ.1) THEN - DO 60 J = N,1,-1 - IF (X(J).NE.ZERO) THEN - TEMP = X(J) - K = KK - DO 50 I = N,J + 1,-1 - X(I) = X(I) + TEMP*AP(K) - K = K - 1 - 50 CONTINUE - IF (NOUNIT) X(J) = X(J)*AP(KK-N+J) - END IF - KK = KK - (N-J+1) - 60 CONTINUE - ELSE - KX = KX + (N-1)*INCX - JX = KX - DO 80 J = N,1,-1 - IF (X(JX).NE.ZERO) THEN - TEMP = X(JX) - IX = KX - DO 70 K = KK,KK - (N- (J+1)),-1 - X(IX) = X(IX) + TEMP*AP(K) - IX = IX - INCX - 70 CONTINUE - IF (NOUNIT) X(JX) = X(JX)*AP(KK-N+J) - END IF - JX = JX - INCX - KK = KK - (N-J+1) - 80 CONTINUE - END IF - END IF - ELSE -* -* Form x := A'*x or x := conjg( A' )*x. -* - IF (LSAME(UPLO,'U')) THEN - KK = (N* (N+1))/2 - IF (INCX.EQ.1) THEN - DO 110 J = N,1,-1 - TEMP = X(J) - K = KK - 1 - IF (NOCONJ) THEN - IF (NOUNIT) TEMP = TEMP*AP(KK) - DO 90 I = J - 1,1,-1 - TEMP = TEMP + AP(K)*X(I) - K = K - 1 - 90 CONTINUE - ELSE - IF (NOUNIT) TEMP = TEMP*DCONJG(AP(KK)) - DO 100 I = J - 1,1,-1 - TEMP = TEMP + DCONJG(AP(K))*X(I) - K = K - 1 - 100 CONTINUE - END IF - X(J) = TEMP - KK = KK - J - 110 CONTINUE - ELSE - JX = KX + (N-1)*INCX - DO 140 J = N,1,-1 - TEMP = X(JX) - IX = JX - IF (NOCONJ) THEN - IF (NOUNIT) TEMP = TEMP*AP(KK) - DO 120 K = KK - 1,KK - J + 1,-1 - IX = IX - INCX - TEMP = TEMP + AP(K)*X(IX) - 120 CONTINUE - ELSE - IF (NOUNIT) TEMP = TEMP*DCONJG(AP(KK)) - DO 130 K = KK - 1,KK - J + 1,-1 - IX = IX - INCX - TEMP = TEMP + DCONJG(AP(K))*X(IX) - 130 CONTINUE - END IF - X(JX) = TEMP - JX = JX - INCX - KK = KK - J - 140 CONTINUE - END IF - ELSE - KK = 1 - IF (INCX.EQ.1) THEN - DO 170 J = 1,N - TEMP = X(J) - K = KK + 1 - IF (NOCONJ) THEN - IF (NOUNIT) TEMP = TEMP*AP(KK) - DO 150 I = J + 1,N - TEMP = TEMP + AP(K)*X(I) - K = K + 1 - 150 CONTINUE - ELSE - IF (NOUNIT) TEMP = TEMP*DCONJG(AP(KK)) - DO 160 I = J + 1,N - TEMP = TEMP + DCONJG(AP(K))*X(I) - K = K + 1 - 160 CONTINUE - END IF - X(J) = TEMP - KK = KK + (N-J+1) - 170 CONTINUE - ELSE - JX = KX - DO 200 J = 1,N - TEMP = X(JX) - IX = JX - IF (NOCONJ) THEN - IF (NOUNIT) TEMP = TEMP*AP(KK) - DO 180 K = KK + 1,KK + N - J - IX = IX + INCX - TEMP = TEMP + AP(K)*X(IX) - 180 CONTINUE - ELSE - IF (NOUNIT) TEMP = TEMP*DCONJG(AP(KK)) - DO 190 K = KK + 1,KK + N - J - IX = IX + INCX - TEMP = TEMP + DCONJG(AP(K))*X(IX) - 190 CONTINUE - END IF - X(JX) = TEMP - JX = JX + INCX - KK = KK + (N-J+1) - 200 CONTINUE - END IF - END IF - END IF -* - RETURN -* -* End of ZTPMV . -* - END -- cgit v1.2.3 From 65caa40a3d792dbbd28b3f47de2a87efea58bb24 Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Mon, 10 Sep 2012 06:29:02 +0800 Subject: Implement packed triangular solver. --- blas/CMakeLists.txt | 8 +- blas/PackedTriangularSolverVector.h | 88 ++++++++++ blas/common.h | 1 + blas/ctpsv.f | 332 ------------------------------------ blas/dtpsv.f | 296 -------------------------------- blas/level2_impl.h | 55 +++++- blas/stpsv.f | 296 -------------------------------- blas/ztpsv.f | 332 ------------------------------------ 8 files changed, 144 insertions(+), 1264 deletions(-) create mode 100644 blas/PackedTriangularSolverVector.h delete mode 100644 blas/ctpsv.f delete mode 100644 blas/dtpsv.f delete mode 100644 blas/stpsv.f delete mode 100644 blas/ztpsv.f (limited to 'blas') diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt index 171b75aa1..3877e1285 100644 --- a/blas/CMakeLists.txt +++ b/blas/CMakeLists.txt @@ -18,10 +18,10 @@ if(EIGEN_Fortran_COMPILER_WORKS) set(EigenBlas_SRCS ${EigenBlas_SRCS} complexdots.f srotm.f srotmg.f drotm.f drotmg.f - lsame.f dspmv.f dtpsv.f ssbmv.f - chbmv.f chpr.f sspmv.f stpsv.f - zhbmv.f zhpr.f chpmv.f ctpsv.f dsbmv.f - zhpmv.f ztpsv.f + lsame.f dspmv.f ssbmv.f + chbmv.f chpr.f sspmv.f + zhbmv.f zhpr.f chpmv.f dsbmv.f + zhpmv.f dtbmv.f stbmv.f ctbmv.f ztbmv.f ) else() diff --git a/blas/PackedTriangularSolverVector.h b/blas/PackedTriangularSolverVector.h new file mode 100644 index 000000000..5c0bb4bd6 --- /dev/null +++ b/blas/PackedTriangularSolverVector.h @@ -0,0 +1,88 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Chen-Pang He +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PACKED_TRIANGULAR_SOLVER_VECTOR_H +#define EIGEN_PACKED_TRIANGULAR_SOLVER_VECTOR_H + +namespace internal { + +template +struct packed_triangular_solve_vector; + +// forward and backward substitution, row-major, rhs is a vector +template +struct packed_triangular_solve_vector +{ + enum { + IsLower = (Mode&Lower)==Lower + }; + static void run(Index size, const LhsScalar* lhs, RhsScalar* rhs) + { + internal::conj_if cj; + typedef Map > LhsMap; + typedef typename conj_expr_if::type ConjLhsType; + + lhs += IsLower ? 0 : (size*(size+1)>>1)-1; + for(Index pi=0; pi0) + rhs[i] -= (ConjLhsType(LhsMap(lhs+s,pi)) + .cwiseProduct(Map >(rhs+(IsLower ? 0 : i+1),pi))).sum(); + if (!(Mode & UnitDiag)) + rhs[i] /= cj(lhs[IsLower ? i : 0]); + IsLower ? lhs += pi+1 : lhs -= pi+2; + } + } +}; + +// forward and backward substitution, column-major, rhs is a vector +template +struct packed_triangular_solve_vector +{ + enum { + IsLower = (Mode&Lower)==Lower + }; + static void run(Index size, const LhsScalar* lhs, RhsScalar* rhs) + { + internal::conj_if cj; + typedef Map > LhsMap; + typedef typename conj_expr_if::type ConjLhsType; + + lhs += IsLower ? 0 : size*(size-1)>>1; + for(Index pi=0; pi0) + Map >(rhs+(IsLower? i+1 : 0),r) -= + rhs[i] * ConjLhsType(LhsMap(lhs+(IsLower? 1 : 0),r)); + IsLower ? lhs += size-pi : lhs -= r; + } + } +}; + +template +struct packed_triangular_solve_vector +{ + static void run(Index size, const LhsScalar* lhs, RhsScalar* rhs) + { + packed_triangular_solve_vector::run(size, lhs, rhs); + } +}; + +} // end namespace internal + +#endif // EIGEN_PACKED_TRIANGULAR_SOLVER_VECTOR_H diff --git a/blas/common.h b/blas/common.h index 1019d8623..ee38b31d1 100644 --- a/blas/common.h +++ b/blas/common.h @@ -77,6 +77,7 @@ namespace Eigen { #include "GeneralRank1Update.h" #include "PackedSelfadjointProduct.h" #include "PackedTriangularMatrixVector.h" +#include "PackedTriangularSolverVector.h" #include "Rank2Update.h" } diff --git a/blas/ctpsv.f b/blas/ctpsv.f deleted file mode 100644 index 1804797ea..000000000 --- a/blas/ctpsv.f +++ /dev/null @@ -1,332 +0,0 @@ - SUBROUTINE CTPSV(UPLO,TRANS,DIAG,N,AP,X,INCX) -* .. Scalar Arguments .. - INTEGER INCX,N - CHARACTER DIAG,TRANS,UPLO -* .. -* .. Array Arguments .. - COMPLEX AP(*),X(*) -* .. -* -* Purpose -* ======= -* -* CTPSV solves one of the systems of equations -* -* A*x = b, or A'*x = b, or conjg( A' )*x = b, -* -* where b and x are n element vectors and A is an n by n unit, or -* non-unit, upper or lower triangular matrix, supplied in packed form. -* -* No test for singularity or near-singularity is included in this -* routine. Such tests must be performed before calling this routine. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the matrix is an upper or -* lower triangular matrix as follows: -* -* UPLO = 'U' or 'u' A is an upper triangular matrix. -* -* UPLO = 'L' or 'l' A is a lower triangular matrix. -* -* Unchanged on exit. -* -* TRANS - CHARACTER*1. -* On entry, TRANS specifies the equations to be solved as -* follows: -* -* TRANS = 'N' or 'n' A*x = b. -* -* TRANS = 'T' or 't' A'*x = b. -* -* TRANS = 'C' or 'c' conjg( A' )*x = b. -* -* Unchanged on exit. -* -* DIAG - CHARACTER*1. -* On entry, DIAG specifies whether or not A is unit -* triangular as follows: -* -* DIAG = 'U' or 'u' A is assumed to be unit triangular. -* -* DIAG = 'N' or 'n' A is not assumed to be unit -* triangular. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* AP - COMPLEX array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular matrix packed sequentially, -* column by column, so that AP( 1 ) contains a( 1, 1 ), -* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) -* respectively, and so on. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular matrix packed sequentially, -* column by column, so that AP( 1 ) contains a( 1, 1 ), -* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) -* respectively, and so on. -* Note that when DIAG = 'U' or 'u', the diagonal elements of -* A are not referenced, but are assumed to be unity. -* Unchanged on exit. -* -* X - COMPLEX array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element right-hand side vector b. On exit, X is overwritten -* with the solution vector x. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - COMPLEX ZERO - PARAMETER (ZERO= (0.0E+0,0.0E+0)) -* .. -* .. Local Scalars .. - COMPLEX TEMP - INTEGER I,INFO,IX,J,JX,K,KK,KX - LOGICAL NOCONJ,NOUNIT -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC CONJG -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. - + .NOT.LSAME(TRANS,'C')) THEN - INFO = 2 - ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN - INFO = 3 - ELSE IF (N.LT.0) THEN - INFO = 4 - ELSE IF (INCX.EQ.0) THEN - INFO = 7 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('CTPSV ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF (N.EQ.0) RETURN -* - NOCONJ = LSAME(TRANS,'T') - NOUNIT = LSAME(DIAG,'N') -* -* Set up the start point in X if the increment is not unity. This -* will be ( N - 1 )*INCX too small for descending loops. -* - IF (INCX.LE.0) THEN - KX = 1 - (N-1)*INCX - ELSE IF (INCX.NE.1) THEN - KX = 1 - END IF -* -* Start the operations. In this version the elements of AP are -* accessed sequentially with one pass through AP. -* - IF (LSAME(TRANS,'N')) THEN -* -* Form x := inv( A )*x. -* - IF (LSAME(UPLO,'U')) THEN - KK = (N* (N+1))/2 - IF (INCX.EQ.1) THEN - DO 20 J = N,1,-1 - IF (X(J).NE.ZERO) THEN - IF (NOUNIT) X(J) = X(J)/AP(KK) - TEMP = X(J) - K = KK - 1 - DO 10 I = J - 1,1,-1 - X(I) = X(I) - TEMP*AP(K) - K = K - 1 - 10 CONTINUE - END IF - KK = KK - J - 20 CONTINUE - ELSE - JX = KX + (N-1)*INCX - DO 40 J = N,1,-1 - IF (X(JX).NE.ZERO) THEN - IF (NOUNIT) X(JX) = X(JX)/AP(KK) - TEMP = X(JX) - IX = JX - DO 30 K = KK - 1,KK - J + 1,-1 - IX = IX - INCX - X(IX) = X(IX) - TEMP*AP(K) - 30 CONTINUE - END IF - JX = JX - INCX - KK = KK - J - 40 CONTINUE - END IF - ELSE - KK = 1 - IF (INCX.EQ.1) THEN - DO 60 J = 1,N - IF (X(J).NE.ZERO) THEN - IF (NOUNIT) X(J) = X(J)/AP(KK) - TEMP = X(J) - K = KK + 1 - DO 50 I = J + 1,N - X(I) = X(I) - TEMP*AP(K) - K = K + 1 - 50 CONTINUE - END IF - KK = KK + (N-J+1) - 60 CONTINUE - ELSE - JX = KX - DO 80 J = 1,N - IF (X(JX).NE.ZERO) THEN - IF (NOUNIT) X(JX) = X(JX)/AP(KK) - TEMP = X(JX) - IX = JX - DO 70 K = KK + 1,KK + N - J - IX = IX + INCX - X(IX) = X(IX) - TEMP*AP(K) - 70 CONTINUE - END IF - JX = JX + INCX - KK = KK + (N-J+1) - 80 CONTINUE - END IF - END IF - ELSE -* -* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. -* - IF (LSAME(UPLO,'U')) THEN - KK = 1 - IF (INCX.EQ.1) THEN - DO 110 J = 1,N - TEMP = X(J) - K = KK - IF (NOCONJ) THEN - DO 90 I = 1,J - 1 - TEMP = TEMP - AP(K)*X(I) - K = K + 1 - 90 CONTINUE - IF (NOUNIT) TEMP = TEMP/AP(KK+J-1) - ELSE - DO 100 I = 1,J - 1 - TEMP = TEMP - CONJG(AP(K))*X(I) - K = K + 1 - 100 CONTINUE - IF (NOUNIT) TEMP = TEMP/CONJG(AP(KK+J-1)) - END IF - X(J) = TEMP - KK = KK + J - 110 CONTINUE - ELSE - JX = KX - DO 140 J = 1,N - TEMP = X(JX) - IX = KX - IF (NOCONJ) THEN - DO 120 K = KK,KK + J - 2 - TEMP = TEMP - AP(K)*X(IX) - IX = IX + INCX - 120 CONTINUE - IF (NOUNIT) TEMP = TEMP/AP(KK+J-1) - ELSE - DO 130 K = KK,KK + J - 2 - TEMP = TEMP - CONJG(AP(K))*X(IX) - IX = IX + INCX - 130 CONTINUE - IF (NOUNIT) TEMP = TEMP/CONJG(AP(KK+J-1)) - END IF - X(JX) = TEMP - JX = JX + INCX - KK = KK + J - 140 CONTINUE - END IF - ELSE - KK = (N* (N+1))/2 - IF (INCX.EQ.1) THEN - DO 170 J = N,1,-1 - TEMP = X(J) - K = KK - IF (NOCONJ) THEN - DO 150 I = N,J + 1,-1 - TEMP = TEMP - AP(K)*X(I) - K = K - 1 - 150 CONTINUE - IF (NOUNIT) TEMP = TEMP/AP(KK-N+J) - ELSE - DO 160 I = N,J + 1,-1 - TEMP = TEMP - CONJG(AP(K))*X(I) - K = K - 1 - 160 CONTINUE - IF (NOUNIT) TEMP = TEMP/CONJG(AP(KK-N+J)) - END IF - X(J) = TEMP - KK = KK - (N-J+1) - 170 CONTINUE - ELSE - KX = KX + (N-1)*INCX - JX = KX - DO 200 J = N,1,-1 - TEMP = X(JX) - IX = KX - IF (NOCONJ) THEN - DO 180 K = KK,KK - (N- (J+1)),-1 - TEMP = TEMP - AP(K)*X(IX) - IX = IX - INCX - 180 CONTINUE - IF (NOUNIT) TEMP = TEMP/AP(KK-N+J) - ELSE - DO 190 K = KK,KK - (N- (J+1)),-1 - TEMP = TEMP - CONJG(AP(K))*X(IX) - IX = IX - INCX - 190 CONTINUE - IF (NOUNIT) TEMP = TEMP/CONJG(AP(KK-N+J)) - END IF - X(JX) = TEMP - JX = JX - INCX - KK = KK - (N-J+1) - 200 CONTINUE - END IF - END IF - END IF -* - RETURN -* -* End of CTPSV . -* - END diff --git a/blas/dtpsv.f b/blas/dtpsv.f deleted file mode 100644 index c7e58d32f..000000000 --- a/blas/dtpsv.f +++ /dev/null @@ -1,296 +0,0 @@ - SUBROUTINE DTPSV(UPLO,TRANS,DIAG,N,AP,X,INCX) -* .. Scalar Arguments .. - INTEGER INCX,N - CHARACTER DIAG,TRANS,UPLO -* .. -* .. Array Arguments .. - DOUBLE PRECISION AP(*),X(*) -* .. -* -* Purpose -* ======= -* -* DTPSV solves one of the systems of equations -* -* A*x = b, or A'*x = b, -* -* where b and x are n element vectors and A is an n by n unit, or -* non-unit, upper or lower triangular matrix, supplied in packed form. -* -* No test for singularity or near-singularity is included in this -* routine. Such tests must be performed before calling this routine. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the matrix is an upper or -* lower triangular matrix as follows: -* -* UPLO = 'U' or 'u' A is an upper triangular matrix. -* -* UPLO = 'L' or 'l' A is a lower triangular matrix. -* -* Unchanged on exit. -* -* TRANS - CHARACTER*1. -* On entry, TRANS specifies the equations to be solved as -* follows: -* -* TRANS = 'N' or 'n' A*x = b. -* -* TRANS = 'T' or 't' A'*x = b. -* -* TRANS = 'C' or 'c' A'*x = b. -* -* Unchanged on exit. -* -* DIAG - CHARACTER*1. -* On entry, DIAG specifies whether or not A is unit -* triangular as follows: -* -* DIAG = 'U' or 'u' A is assumed to be unit triangular. -* -* DIAG = 'N' or 'n' A is not assumed to be unit -* triangular. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* AP - DOUBLE PRECISION array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular matrix packed sequentially, -* column by column, so that AP( 1 ) contains a( 1, 1 ), -* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) -* respectively, and so on. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular matrix packed sequentially, -* column by column, so that AP( 1 ) contains a( 1, 1 ), -* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) -* respectively, and so on. -* Note that when DIAG = 'U' or 'u', the diagonal elements of -* A are not referenced, but are assumed to be unity. -* Unchanged on exit. -* -* X - DOUBLE PRECISION array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element right-hand side vector b. On exit, X is overwritten -* with the solution vector x. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - DOUBLE PRECISION ZERO - PARAMETER (ZERO=0.0D+0) -* .. -* .. Local Scalars .. - DOUBLE PRECISION TEMP - INTEGER I,INFO,IX,J,JX,K,KK,KX - LOGICAL NOUNIT -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. - + .NOT.LSAME(TRANS,'C')) THEN - INFO = 2 - ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN - INFO = 3 - ELSE IF (N.LT.0) THEN - INFO = 4 - ELSE IF (INCX.EQ.0) THEN - INFO = 7 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('DTPSV ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF (N.EQ.0) RETURN -* - NOUNIT = LSAME(DIAG,'N') -* -* Set up the start point in X if the increment is not unity. This -* will be ( N - 1 )*INCX too small for descending loops. -* - IF (INCX.LE.0) THEN - KX = 1 - (N-1)*INCX - ELSE IF (INCX.NE.1) THEN - KX = 1 - END IF -* -* Start the operations. In this version the elements of AP are -* accessed sequentially with one pass through AP. -* - IF (LSAME(TRANS,'N')) THEN -* -* Form x := inv( A )*x. -* - IF (LSAME(UPLO,'U')) THEN - KK = (N* (N+1))/2 - IF (INCX.EQ.1) THEN - DO 20 J = N,1,-1 - IF (X(J).NE.ZERO) THEN - IF (NOUNIT) X(J) = X(J)/AP(KK) - TEMP = X(J) - K = KK - 1 - DO 10 I = J - 1,1,-1 - X(I) = X(I) - TEMP*AP(K) - K = K - 1 - 10 CONTINUE - END IF - KK = KK - J - 20 CONTINUE - ELSE - JX = KX + (N-1)*INCX - DO 40 J = N,1,-1 - IF (X(JX).NE.ZERO) THEN - IF (NOUNIT) X(JX) = X(JX)/AP(KK) - TEMP = X(JX) - IX = JX - DO 30 K = KK - 1,KK - J + 1,-1 - IX = IX - INCX - X(IX) = X(IX) - TEMP*AP(K) - 30 CONTINUE - END IF - JX = JX - INCX - KK = KK - J - 40 CONTINUE - END IF - ELSE - KK = 1 - IF (INCX.EQ.1) THEN - DO 60 J = 1,N - IF (X(J).NE.ZERO) THEN - IF (NOUNIT) X(J) = X(J)/AP(KK) - TEMP = X(J) - K = KK + 1 - DO 50 I = J + 1,N - X(I) = X(I) - TEMP*AP(K) - K = K + 1 - 50 CONTINUE - END IF - KK = KK + (N-J+1) - 60 CONTINUE - ELSE - JX = KX - DO 80 J = 1,N - IF (X(JX).NE.ZERO) THEN - IF (NOUNIT) X(JX) = X(JX)/AP(KK) - TEMP = X(JX) - IX = JX - DO 70 K = KK + 1,KK + N - J - IX = IX + INCX - X(IX) = X(IX) - TEMP*AP(K) - 70 CONTINUE - END IF - JX = JX + INCX - KK = KK + (N-J+1) - 80 CONTINUE - END IF - END IF - ELSE -* -* Form x := inv( A' )*x. -* - IF (LSAME(UPLO,'U')) THEN - KK = 1 - IF (INCX.EQ.1) THEN - DO 100 J = 1,N - TEMP = X(J) - K = KK - DO 90 I = 1,J - 1 - TEMP = TEMP - AP(K)*X(I) - K = K + 1 - 90 CONTINUE - IF (NOUNIT) TEMP = TEMP/AP(KK+J-1) - X(J) = TEMP - KK = KK + J - 100 CONTINUE - ELSE - JX = KX - DO 120 J = 1,N - TEMP = X(JX) - IX = KX - DO 110 K = KK,KK + J - 2 - TEMP = TEMP - AP(K)*X(IX) - IX = IX + INCX - 110 CONTINUE - IF (NOUNIT) TEMP = TEMP/AP(KK+J-1) - X(JX) = TEMP - JX = JX + INCX - KK = KK + J - 120 CONTINUE - END IF - ELSE - KK = (N* (N+1))/2 - IF (INCX.EQ.1) THEN - DO 140 J = N,1,-1 - TEMP = X(J) - K = KK - DO 130 I = N,J + 1,-1 - TEMP = TEMP - AP(K)*X(I) - K = K - 1 - 130 CONTINUE - IF (NOUNIT) TEMP = TEMP/AP(KK-N+J) - X(J) = TEMP - KK = KK - (N-J+1) - 140 CONTINUE - ELSE - KX = KX + (N-1)*INCX - JX = KX - DO 160 J = N,1,-1 - TEMP = X(JX) - IX = KX - DO 150 K = KK,KK - (N- (J+1)),-1 - TEMP = TEMP - AP(K)*X(IX) - IX = IX - INCX - 150 CONTINUE - IF (NOUNIT) TEMP = TEMP/AP(KK-N+J) - X(JX) = TEMP - JX = JX - INCX - KK = KK - (N-J+1) - 160 CONTINUE - END IF - END IF - END IF -* - RETURN -* -* End of DTPSV . -* - END diff --git a/blas/level2_impl.h b/blas/level2_impl.h index 997ad016f..bd41f7e60 100644 --- a/blas/level2_impl.h +++ b/blas/level2_impl.h @@ -470,8 +470,55 @@ int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. */ -// int EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *trans, char *diag, int *n, RealScalar *ap, RealScalar *x, int *incx) -// { -// return 1; -// } +int EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) +{ + typedef void (*functype)(int, const Scalar*, Scalar*); + static functype func[16]; + + static bool init = false; + if(!init) + { + for(int k=0; k<16; ++k) + func[k] = 0; + + func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); + func[TR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); + func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); + + func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); + func[TR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); + func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); + + func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); + func[TR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); + func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); + + func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); + func[TR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); + func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); + + init = true; + } + + Scalar* ap = reinterpret_cast(pap); + Scalar* x = reinterpret_cast(px); + + int info = 0; + if(UPLO(*uplo)==INVALID) info = 1; + else if(OP(*opa)==INVALID) info = 2; + else if(DIAG(*diag)==INVALID) info = 3; + else if(*n<0) info = 4; + else if(*incx==0) info = 7; + if(info) + return xerbla_(SCALAR_SUFFIX_UP"TPSV ",&info,6); + + Scalar* actual_x = get_compact_vector(x,*n,*incx); + + int code = OP(*opa) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3); + func[code](*n, ap, actual_x); + + if(actual_x!=x) delete[] copy_back(actual_x,x,*n,*incx); + + return 1; +} diff --git a/blas/stpsv.f b/blas/stpsv.f deleted file mode 100644 index 7d95efbde..000000000 --- a/blas/stpsv.f +++ /dev/null @@ -1,296 +0,0 @@ - SUBROUTINE STPSV(UPLO,TRANS,DIAG,N,AP,X,INCX) -* .. Scalar Arguments .. - INTEGER INCX,N - CHARACTER DIAG,TRANS,UPLO -* .. -* .. Array Arguments .. - REAL AP(*),X(*) -* .. -* -* Purpose -* ======= -* -* STPSV solves one of the systems of equations -* -* A*x = b, or A'*x = b, -* -* where b and x are n element vectors and A is an n by n unit, or -* non-unit, upper or lower triangular matrix, supplied in packed form. -* -* No test for singularity or near-singularity is included in this -* routine. Such tests must be performed before calling this routine. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the matrix is an upper or -* lower triangular matrix as follows: -* -* UPLO = 'U' or 'u' A is an upper triangular matrix. -* -* UPLO = 'L' or 'l' A is a lower triangular matrix. -* -* Unchanged on exit. -* -* TRANS - CHARACTER*1. -* On entry, TRANS specifies the equations to be solved as -* follows: -* -* TRANS = 'N' or 'n' A*x = b. -* -* TRANS = 'T' or 't' A'*x = b. -* -* TRANS = 'C' or 'c' A'*x = b. -* -* Unchanged on exit. -* -* DIAG - CHARACTER*1. -* On entry, DIAG specifies whether or not A is unit -* triangular as follows: -* -* DIAG = 'U' or 'u' A is assumed to be unit triangular. -* -* DIAG = 'N' or 'n' A is not assumed to be unit -* triangular. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* AP - REAL array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular matrix packed sequentially, -* column by column, so that AP( 1 ) contains a( 1, 1 ), -* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) -* respectively, and so on. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular matrix packed sequentially, -* column by column, so that AP( 1 ) contains a( 1, 1 ), -* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) -* respectively, and so on. -* Note that when DIAG = 'U' or 'u', the diagonal elements of -* A are not referenced, but are assumed to be unity. -* Unchanged on exit. -* -* X - REAL array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element right-hand side vector b. On exit, X is overwritten -* with the solution vector x. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - REAL ZERO - PARAMETER (ZERO=0.0E+0) -* .. -* .. Local Scalars .. - REAL TEMP - INTEGER I,INFO,IX,J,JX,K,KK,KX - LOGICAL NOUNIT -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. - + .NOT.LSAME(TRANS,'C')) THEN - INFO = 2 - ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN - INFO = 3 - ELSE IF (N.LT.0) THEN - INFO = 4 - ELSE IF (INCX.EQ.0) THEN - INFO = 7 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('STPSV ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF (N.EQ.0) RETURN -* - NOUNIT = LSAME(DIAG,'N') -* -* Set up the start point in X if the increment is not unity. This -* will be ( N - 1 )*INCX too small for descending loops. -* - IF (INCX.LE.0) THEN - KX = 1 - (N-1)*INCX - ELSE IF (INCX.NE.1) THEN - KX = 1 - END IF -* -* Start the operations. In this version the elements of AP are -* accessed sequentially with one pass through AP. -* - IF (LSAME(TRANS,'N')) THEN -* -* Form x := inv( A )*x. -* - IF (LSAME(UPLO,'U')) THEN - KK = (N* (N+1))/2 - IF (INCX.EQ.1) THEN - DO 20 J = N,1,-1 - IF (X(J).NE.ZERO) THEN - IF (NOUNIT) X(J) = X(J)/AP(KK) - TEMP = X(J) - K = KK - 1 - DO 10 I = J - 1,1,-1 - X(I) = X(I) - TEMP*AP(K) - K = K - 1 - 10 CONTINUE - END IF - KK = KK - J - 20 CONTINUE - ELSE - JX = KX + (N-1)*INCX - DO 40 J = N,1,-1 - IF (X(JX).NE.ZERO) THEN - IF (NOUNIT) X(JX) = X(JX)/AP(KK) - TEMP = X(JX) - IX = JX - DO 30 K = KK - 1,KK - J + 1,-1 - IX = IX - INCX - X(IX) = X(IX) - TEMP*AP(K) - 30 CONTINUE - END IF - JX = JX - INCX - KK = KK - J - 40 CONTINUE - END IF - ELSE - KK = 1 - IF (INCX.EQ.1) THEN - DO 60 J = 1,N - IF (X(J).NE.ZERO) THEN - IF (NOUNIT) X(J) = X(J)/AP(KK) - TEMP = X(J) - K = KK + 1 - DO 50 I = J + 1,N - X(I) = X(I) - TEMP*AP(K) - K = K + 1 - 50 CONTINUE - END IF - KK = KK + (N-J+1) - 60 CONTINUE - ELSE - JX = KX - DO 80 J = 1,N - IF (X(JX).NE.ZERO) THEN - IF (NOUNIT) X(JX) = X(JX)/AP(KK) - TEMP = X(JX) - IX = JX - DO 70 K = KK + 1,KK + N - J - IX = IX + INCX - X(IX) = X(IX) - TEMP*AP(K) - 70 CONTINUE - END IF - JX = JX + INCX - KK = KK + (N-J+1) - 80 CONTINUE - END IF - END IF - ELSE -* -* Form x := inv( A' )*x. -* - IF (LSAME(UPLO,'U')) THEN - KK = 1 - IF (INCX.EQ.1) THEN - DO 100 J = 1,N - TEMP = X(J) - K = KK - DO 90 I = 1,J - 1 - TEMP = TEMP - AP(K)*X(I) - K = K + 1 - 90 CONTINUE - IF (NOUNIT) TEMP = TEMP/AP(KK+J-1) - X(J) = TEMP - KK = KK + J - 100 CONTINUE - ELSE - JX = KX - DO 120 J = 1,N - TEMP = X(JX) - IX = KX - DO 110 K = KK,KK + J - 2 - TEMP = TEMP - AP(K)*X(IX) - IX = IX + INCX - 110 CONTINUE - IF (NOUNIT) TEMP = TEMP/AP(KK+J-1) - X(JX) = TEMP - JX = JX + INCX - KK = KK + J - 120 CONTINUE - END IF - ELSE - KK = (N* (N+1))/2 - IF (INCX.EQ.1) THEN - DO 140 J = N,1,-1 - TEMP = X(J) - K = KK - DO 130 I = N,J + 1,-1 - TEMP = TEMP - AP(K)*X(I) - K = K - 1 - 130 CONTINUE - IF (NOUNIT) TEMP = TEMP/AP(KK-N+J) - X(J) = TEMP - KK = KK - (N-J+1) - 140 CONTINUE - ELSE - KX = KX + (N-1)*INCX - JX = KX - DO 160 J = N,1,-1 - TEMP = X(JX) - IX = KX - DO 150 K = KK,KK - (N- (J+1)),-1 - TEMP = TEMP - AP(K)*X(IX) - IX = IX - INCX - 150 CONTINUE - IF (NOUNIT) TEMP = TEMP/AP(KK-N+J) - X(JX) = TEMP - JX = JX - INCX - KK = KK - (N-J+1) - 160 CONTINUE - END IF - END IF - END IF -* - RETURN -* -* End of STPSV . -* - END diff --git a/blas/ztpsv.f b/blas/ztpsv.f deleted file mode 100644 index b56e1d8c4..000000000 --- a/blas/ztpsv.f +++ /dev/null @@ -1,332 +0,0 @@ - SUBROUTINE ZTPSV(UPLO,TRANS,DIAG,N,AP,X,INCX) -* .. Scalar Arguments .. - INTEGER INCX,N - CHARACTER DIAG,TRANS,UPLO -* .. -* .. Array Arguments .. - DOUBLE COMPLEX AP(*),X(*) -* .. -* -* Purpose -* ======= -* -* ZTPSV solves one of the systems of equations -* -* A*x = b, or A'*x = b, or conjg( A' )*x = b, -* -* where b and x are n element vectors and A is an n by n unit, or -* non-unit, upper or lower triangular matrix, supplied in packed form. -* -* No test for singularity or near-singularity is included in this -* routine. Such tests must be performed before calling this routine. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the matrix is an upper or -* lower triangular matrix as follows: -* -* UPLO = 'U' or 'u' A is an upper triangular matrix. -* -* UPLO = 'L' or 'l' A is a lower triangular matrix. -* -* Unchanged on exit. -* -* TRANS - CHARACTER*1. -* On entry, TRANS specifies the equations to be solved as -* follows: -* -* TRANS = 'N' or 'n' A*x = b. -* -* TRANS = 'T' or 't' A'*x = b. -* -* TRANS = 'C' or 'c' conjg( A' )*x = b. -* -* Unchanged on exit. -* -* DIAG - CHARACTER*1. -* On entry, DIAG specifies whether or not A is unit -* triangular as follows: -* -* DIAG = 'U' or 'u' A is assumed to be unit triangular. -* -* DIAG = 'N' or 'n' A is not assumed to be unit -* triangular. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* AP - COMPLEX*16 array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular matrix packed sequentially, -* column by column, so that AP( 1 ) contains a( 1, 1 ), -* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) -* respectively, and so on. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular matrix packed sequentially, -* column by column, so that AP( 1 ) contains a( 1, 1 ), -* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) -* respectively, and so on. -* Note that when DIAG = 'U' or 'u', the diagonal elements of -* A are not referenced, but are assumed to be unity. -* Unchanged on exit. -* -* X - COMPLEX*16 array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element right-hand side vector b. On exit, X is overwritten -* with the solution vector x. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - DOUBLE COMPLEX ZERO - PARAMETER (ZERO= (0.0D+0,0.0D+0)) -* .. -* .. Local Scalars .. - DOUBLE COMPLEX TEMP - INTEGER I,INFO,IX,J,JX,K,KK,KX - LOGICAL NOCONJ,NOUNIT -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC DCONJG -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. - + .NOT.LSAME(TRANS,'C')) THEN - INFO = 2 - ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN - INFO = 3 - ELSE IF (N.LT.0) THEN - INFO = 4 - ELSE IF (INCX.EQ.0) THEN - INFO = 7 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('ZTPSV ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF (N.EQ.0) RETURN -* - NOCONJ = LSAME(TRANS,'T') - NOUNIT = LSAME(DIAG,'N') -* -* Set up the start point in X if the increment is not unity. This -* will be ( N - 1 )*INCX too small for descending loops. -* - IF (INCX.LE.0) THEN - KX = 1 - (N-1)*INCX - ELSE IF (INCX.NE.1) THEN - KX = 1 - END IF -* -* Start the operations. In this version the elements of AP are -* accessed sequentially with one pass through AP. -* - IF (LSAME(TRANS,'N')) THEN -* -* Form x := inv( A )*x. -* - IF (LSAME(UPLO,'U')) THEN - KK = (N* (N+1))/2 - IF (INCX.EQ.1) THEN - DO 20 J = N,1,-1 - IF (X(J).NE.ZERO) THEN - IF (NOUNIT) X(J) = X(J)/AP(KK) - TEMP = X(J) - K = KK - 1 - DO 10 I = J - 1,1,-1 - X(I) = X(I) - TEMP*AP(K) - K = K - 1 - 10 CONTINUE - END IF - KK = KK - J - 20 CONTINUE - ELSE - JX = KX + (N-1)*INCX - DO 40 J = N,1,-1 - IF (X(JX).NE.ZERO) THEN - IF (NOUNIT) X(JX) = X(JX)/AP(KK) - TEMP = X(JX) - IX = JX - DO 30 K = KK - 1,KK - J + 1,-1 - IX = IX - INCX - X(IX) = X(IX) - TEMP*AP(K) - 30 CONTINUE - END IF - JX = JX - INCX - KK = KK - J - 40 CONTINUE - END IF - ELSE - KK = 1 - IF (INCX.EQ.1) THEN - DO 60 J = 1,N - IF (X(J).NE.ZERO) THEN - IF (NOUNIT) X(J) = X(J)/AP(KK) - TEMP = X(J) - K = KK + 1 - DO 50 I = J + 1,N - X(I) = X(I) - TEMP*AP(K) - K = K + 1 - 50 CONTINUE - END IF - KK = KK + (N-J+1) - 60 CONTINUE - ELSE - JX = KX - DO 80 J = 1,N - IF (X(JX).NE.ZERO) THEN - IF (NOUNIT) X(JX) = X(JX)/AP(KK) - TEMP = X(JX) - IX = JX - DO 70 K = KK + 1,KK + N - J - IX = IX + INCX - X(IX) = X(IX) - TEMP*AP(K) - 70 CONTINUE - END IF - JX = JX + INCX - KK = KK + (N-J+1) - 80 CONTINUE - END IF - END IF - ELSE -* -* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. -* - IF (LSAME(UPLO,'U')) THEN - KK = 1 - IF (INCX.EQ.1) THEN - DO 110 J = 1,N - TEMP = X(J) - K = KK - IF (NOCONJ) THEN - DO 90 I = 1,J - 1 - TEMP = TEMP - AP(K)*X(I) - K = K + 1 - 90 CONTINUE - IF (NOUNIT) TEMP = TEMP/AP(KK+J-1) - ELSE - DO 100 I = 1,J - 1 - TEMP = TEMP - DCONJG(AP(K))*X(I) - K = K + 1 - 100 CONTINUE - IF (NOUNIT) TEMP = TEMP/DCONJG(AP(KK+J-1)) - END IF - X(J) = TEMP - KK = KK + J - 110 CONTINUE - ELSE - JX = KX - DO 140 J = 1,N - TEMP = X(JX) - IX = KX - IF (NOCONJ) THEN - DO 120 K = KK,KK + J - 2 - TEMP = TEMP - AP(K)*X(IX) - IX = IX + INCX - 120 CONTINUE - IF (NOUNIT) TEMP = TEMP/AP(KK+J-1) - ELSE - DO 130 K = KK,KK + J - 2 - TEMP = TEMP - DCONJG(AP(K))*X(IX) - IX = IX + INCX - 130 CONTINUE - IF (NOUNIT) TEMP = TEMP/DCONJG(AP(KK+J-1)) - END IF - X(JX) = TEMP - JX = JX + INCX - KK = KK + J - 140 CONTINUE - END IF - ELSE - KK = (N* (N+1))/2 - IF (INCX.EQ.1) THEN - DO 170 J = N,1,-1 - TEMP = X(J) - K = KK - IF (NOCONJ) THEN - DO 150 I = N,J + 1,-1 - TEMP = TEMP - AP(K)*X(I) - K = K - 1 - 150 CONTINUE - IF (NOUNIT) TEMP = TEMP/AP(KK-N+J) - ELSE - DO 160 I = N,J + 1,-1 - TEMP = TEMP - DCONJG(AP(K))*X(I) - K = K - 1 - 160 CONTINUE - IF (NOUNIT) TEMP = TEMP/DCONJG(AP(KK-N+J)) - END IF - X(J) = TEMP - KK = KK - (N-J+1) - 170 CONTINUE - ELSE - KX = KX + (N-1)*INCX - JX = KX - DO 200 J = N,1,-1 - TEMP = X(JX) - IX = KX - IF (NOCONJ) THEN - DO 180 K = KK,KK - (N- (J+1)),-1 - TEMP = TEMP - AP(K)*X(IX) - IX = IX - INCX - 180 CONTINUE - IF (NOUNIT) TEMP = TEMP/AP(KK-N+J) - ELSE - DO 190 K = KK,KK - (N- (J+1)),-1 - TEMP = TEMP - DCONJG(AP(K))*X(IX) - IX = IX - INCX - 190 CONTINUE - IF (NOUNIT) TEMP = TEMP/DCONJG(AP(KK-N+J)) - END IF - X(JX) = TEMP - JX = JX - INCX - KK = KK - (N-J+1) - 200 CONTINUE - END IF - END IF - END IF -* - RETURN -* -* End of ZTPSV . -* - END -- cgit v1.2.3 From 04f315d692d4b2b01635981d34b44743ba63571c Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Mon, 10 Sep 2012 18:25:30 +0800 Subject: Fix rank-1 update for self-adjoint packed matrices. --- blas/CMakeLists.txt | 6 +- blas/PackedSelfadjointProduct.h | 19 ++-- blas/chpr.f | 220 ---------------------------------------- blas/dspr.f | 202 ------------------------------------ blas/level2_cplx_impl.h | 47 ++++++++- blas/sspr.f | 202 ------------------------------------ blas/zhpr.f | 220 ---------------------------------------- 7 files changed, 53 insertions(+), 863 deletions(-) delete mode 100644 blas/chpr.f delete mode 100644 blas/dspr.f delete mode 100644 blas/sspr.f delete mode 100644 blas/zhpr.f (limited to 'blas') diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt index 3877e1285..c35a2fdbe 100644 --- a/blas/CMakeLists.txt +++ b/blas/CMakeLists.txt @@ -18,9 +18,9 @@ if(EIGEN_Fortran_COMPILER_WORKS) set(EigenBlas_SRCS ${EigenBlas_SRCS} complexdots.f srotm.f srotmg.f drotm.f drotmg.f - lsame.f dspmv.f ssbmv.f - chbmv.f chpr.f sspmv.f - zhbmv.f zhpr.f chpmv.f dsbmv.f + lsame.f dspmv.f ssbmv.f + chbmv.f sspmv.f + zhbmv.f chpmv.f dsbmv.f zhpmv.f dtbmv.f stbmv.f ctbmv.f ztbmv.f ) diff --git a/blas/PackedSelfadjointProduct.h b/blas/PackedSelfadjointProduct.h index 1ba67b9c1..f7c9b9341 100644 --- a/blas/PackedSelfadjointProduct.h +++ b/blas/PackedSelfadjointProduct.h @@ -14,12 +14,6 @@ namespace internal { /* Optimized matrix += alpha * uv' * The matrix is in packed form. - * - * FIXME I always fail tests for complex self-adjoint matrices. - * - * ******* FATAL ERROR - PARAMETER NUMBER 6 WAS CHANGED INCORRECTLY ******* - * ******* xHPR FAILED ON CALL NUMBER: - * 2: xHPR ('U', 1, 0.0, X, 1, AP) */ template struct selfadjoint_packed_rank1_update; @@ -27,20 +21,20 @@ struct selfadjoint_packed_rank1_update; template struct selfadjoint_packed_rank1_update { - static void run(Index size, Scalar* mat, const Scalar* vec, Scalar alpha) + typedef typename NumTraits::Real RealScalar; + static void run(Index size, Scalar* mat, const Scalar* vec, RealScalar alpha) { typedef Map > OtherMap; typedef typename conj_expr_if::type ConjRhsType; conj_if cj; - Index offset = 0; for (Index i=0; i >(mat+offset, UpLo==Lower ? size-i : (i+1)) + Map >(mat, UpLo==Lower ? size-i : (i+1)) += alpha * cj(vec[i]) * ConjRhsType(OtherMap(vec+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1))); //FIXME This should be handled outside. - mat[offset+(UpLo==Lower ? 0 : i)] = real(mat[offset+(UpLo==Lower ? 0 : i)]); - offset += UpLo==Lower ? size-i : (i+1); + mat[UpLo==Lower ? 0 : i] = real(mat[UpLo==Lower ? 0 : i]); + mat += UpLo==Lower ? size-i : (i+1); } } }; @@ -48,7 +42,8 @@ struct selfadjoint_packed_rank1_update struct selfadjoint_packed_rank1_update { - static void run(Index size, Scalar* mat, const Scalar* vec, Scalar alpha) + typedef typename NumTraits::Real RealScalar; + static void run(Index size, Scalar* mat, const Scalar* vec, RealScalar alpha) { selfadjoint_packed_rank1_update::run(size,mat,vec,alpha); } diff --git a/blas/chpr.f b/blas/chpr.f deleted file mode 100644 index 11bd5c6ee..000000000 --- a/blas/chpr.f +++ /dev/null @@ -1,220 +0,0 @@ - SUBROUTINE CHPR(UPLO,N,ALPHA,X,INCX,AP) -* .. Scalar Arguments .. - REAL ALPHA - INTEGER INCX,N - CHARACTER UPLO -* .. -* .. Array Arguments .. - COMPLEX AP(*),X(*) -* .. -* -* Purpose -* ======= -* -* CHPR performs the hermitian rank 1 operation -* -* A := alpha*x*conjg( x' ) + A, -* -* where alpha is a real scalar, x is an n element vector and A is an -* n by n hermitian matrix, supplied in packed form. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the upper or lower -* triangular part of the matrix A is supplied in the packed -* array AP as follows: -* -* UPLO = 'U' or 'u' The upper triangular part of A is -* supplied in AP. -* -* UPLO = 'L' or 'l' The lower triangular part of A is -* supplied in AP. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* ALPHA - REAL . -* On entry, ALPHA specifies the scalar alpha. -* Unchanged on exit. -* -* X - COMPLEX array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. -* Unchanged on exit. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* AP - COMPLEX array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular part of the hermitian matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) -* and a( 2, 2 ) respectively, and so on. On exit, the array -* AP is overwritten by the upper triangular part of the -* updated matrix. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular part of the hermitian matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) -* and a( 3, 1 ) respectively, and so on. On exit, the array -* AP is overwritten by the lower triangular part of the -* updated matrix. -* Note that the imaginary parts of the diagonal elements need -* not be set, they are assumed to be zero, and on exit they -* are set to zero. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - COMPLEX ZERO - PARAMETER (ZERO= (0.0E+0,0.0E+0)) -* .. -* .. Local Scalars .. - COMPLEX TEMP - INTEGER I,INFO,IX,J,JX,K,KK,KX -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC CONJG,REAL -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (N.LT.0) THEN - INFO = 2 - ELSE IF (INCX.EQ.0) THEN - INFO = 5 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('CHPR ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF ((N.EQ.0) .OR. (ALPHA.EQ.REAL(ZERO))) RETURN -* -* Set the start point in X if the increment is not unity. -* - IF (INCX.LE.0) THEN - KX = 1 - (N-1)*INCX - ELSE IF (INCX.NE.1) THEN - KX = 1 - END IF -* -* Start the operations. In this version the elements of the array AP -* are accessed sequentially with one pass through AP. -* - KK = 1 - IF (LSAME(UPLO,'U')) THEN -* -* Form A when upper triangle is stored in AP. -* - IF (INCX.EQ.1) THEN - DO 20 J = 1,N - IF (X(J).NE.ZERO) THEN - TEMP = ALPHA*CONJG(X(J)) - K = KK - DO 10 I = 1,J - 1 - AP(K) = AP(K) + X(I)*TEMP - K = K + 1 - 10 CONTINUE - AP(KK+J-1) = REAL(AP(KK+J-1)) + REAL(X(J)*TEMP) - ELSE - AP(KK+J-1) = REAL(AP(KK+J-1)) - END IF - KK = KK + J - 20 CONTINUE - ELSE - JX = KX - DO 40 J = 1,N - IF (X(JX).NE.ZERO) THEN - TEMP = ALPHA*CONJG(X(JX)) - IX = KX - DO 30 K = KK,KK + J - 2 - AP(K) = AP(K) + X(IX)*TEMP - IX = IX + INCX - 30 CONTINUE - AP(KK+J-1) = REAL(AP(KK+J-1)) + REAL(X(JX)*TEMP) - ELSE - AP(KK+J-1) = REAL(AP(KK+J-1)) - END IF - JX = JX + INCX - KK = KK + J - 40 CONTINUE - END IF - ELSE -* -* Form A when lower triangle is stored in AP. -* - IF (INCX.EQ.1) THEN - DO 60 J = 1,N - IF (X(J).NE.ZERO) THEN - TEMP = ALPHA*CONJG(X(J)) - AP(KK) = REAL(AP(KK)) + REAL(TEMP*X(J)) - K = KK + 1 - DO 50 I = J + 1,N - AP(K) = AP(K) + X(I)*TEMP - K = K + 1 - 50 CONTINUE - ELSE - AP(KK) = REAL(AP(KK)) - END IF - KK = KK + N - J + 1 - 60 CONTINUE - ELSE - JX = KX - DO 80 J = 1,N - IF (X(JX).NE.ZERO) THEN - TEMP = ALPHA*CONJG(X(JX)) - AP(KK) = REAL(AP(KK)) + REAL(TEMP*X(JX)) - IX = JX - DO 70 K = KK + 1,KK + N - J - IX = IX + INCX - AP(K) = AP(K) + X(IX)*TEMP - 70 CONTINUE - ELSE - AP(KK) = REAL(AP(KK)) - END IF - JX = JX + INCX - KK = KK + N - J + 1 - 80 CONTINUE - END IF - END IF -* - RETURN -* -* End of CHPR . -* - END diff --git a/blas/dspr.f b/blas/dspr.f deleted file mode 100644 index 538e4f76b..000000000 --- a/blas/dspr.f +++ /dev/null @@ -1,202 +0,0 @@ - SUBROUTINE DSPR(UPLO,N,ALPHA,X,INCX,AP) -* .. Scalar Arguments .. - DOUBLE PRECISION ALPHA - INTEGER INCX,N - CHARACTER UPLO -* .. -* .. Array Arguments .. - DOUBLE PRECISION AP(*),X(*) -* .. -* -* Purpose -* ======= -* -* DSPR performs the symmetric rank 1 operation -* -* A := alpha*x*x' + A, -* -* where alpha is a real scalar, x is an n element vector and A is an -* n by n symmetric matrix, supplied in packed form. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the upper or lower -* triangular part of the matrix A is supplied in the packed -* array AP as follows: -* -* UPLO = 'U' or 'u' The upper triangular part of A is -* supplied in AP. -* -* UPLO = 'L' or 'l' The lower triangular part of A is -* supplied in AP. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* ALPHA - DOUBLE PRECISION. -* On entry, ALPHA specifies the scalar alpha. -* Unchanged on exit. -* -* X - DOUBLE PRECISION array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. -* Unchanged on exit. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* AP - DOUBLE PRECISION array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular part of the symmetric matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) -* and a( 2, 2 ) respectively, and so on. On exit, the array -* AP is overwritten by the upper triangular part of the -* updated matrix. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular part of the symmetric matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) -* and a( 3, 1 ) respectively, and so on. On exit, the array -* AP is overwritten by the lower triangular part of the -* updated matrix. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - DOUBLE PRECISION ZERO - PARAMETER (ZERO=0.0D+0) -* .. -* .. Local Scalars .. - DOUBLE PRECISION TEMP - INTEGER I,INFO,IX,J,JX,K,KK,KX -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (N.LT.0) THEN - INFO = 2 - ELSE IF (INCX.EQ.0) THEN - INFO = 5 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('DSPR ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN -* -* Set the start point in X if the increment is not unity. -* - IF (INCX.LE.0) THEN - KX = 1 - (N-1)*INCX - ELSE IF (INCX.NE.1) THEN - KX = 1 - END IF -* -* Start the operations. In this version the elements of the array AP -* are accessed sequentially with one pass through AP. -* - KK = 1 - IF (LSAME(UPLO,'U')) THEN -* -* Form A when upper triangle is stored in AP. -* - IF (INCX.EQ.1) THEN - DO 20 J = 1,N - IF (X(J).NE.ZERO) THEN - TEMP = ALPHA*X(J) - K = KK - DO 10 I = 1,J - AP(K) = AP(K) + X(I)*TEMP - K = K + 1 - 10 CONTINUE - END IF - KK = KK + J - 20 CONTINUE - ELSE - JX = KX - DO 40 J = 1,N - IF (X(JX).NE.ZERO) THEN - TEMP = ALPHA*X(JX) - IX = KX - DO 30 K = KK,KK + J - 1 - AP(K) = AP(K) + X(IX)*TEMP - IX = IX + INCX - 30 CONTINUE - END IF - JX = JX + INCX - KK = KK + J - 40 CONTINUE - END IF - ELSE -* -* Form A when lower triangle is stored in AP. -* - IF (INCX.EQ.1) THEN - DO 60 J = 1,N - IF (X(J).NE.ZERO) THEN - TEMP = ALPHA*X(J) - K = KK - DO 50 I = J,N - AP(K) = AP(K) + X(I)*TEMP - K = K + 1 - 50 CONTINUE - END IF - KK = KK + N - J + 1 - 60 CONTINUE - ELSE - JX = KX - DO 80 J = 1,N - IF (X(JX).NE.ZERO) THEN - TEMP = ALPHA*X(JX) - IX = JX - DO 70 K = KK,KK + N - J - AP(K) = AP(K) + X(IX)*TEMP - IX = IX + INCX - 70 CONTINUE - END IF - JX = JX + INCX - KK = KK + N - J + 1 - 80 CONTINUE - END IF - END IF -* - RETURN -* -* End of DSPR . -* - END diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h index 11ee13b4c..f52d384a9 100644 --- a/blas/level2_cplx_impl.h +++ b/blas/level2_cplx_impl.h @@ -108,10 +108,49 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa * where alpha is a real scalar, x is an n element vector and A is an * n by n hermitian matrix, supplied in packed form. */ -// int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *alpha, RealScalar *x, int *incx, RealScalar *ap) -// { -// return 1; -// } +int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pap) +{ + typedef void (*functype)(int, Scalar*, const Scalar*, RealScalar); + static functype func[2]; + + static bool init = false; + if(!init) + { + for(int k=0; k<2; ++k) + func[k] = 0; + + func[UP] = (internal::selfadjoint_packed_rank1_update::run); + func[LO] = (internal::selfadjoint_packed_rank1_update::run); + + init = true; + } + + Scalar* x = reinterpret_cast(px); + Scalar* ap = reinterpret_cast(pap); + RealScalar alpha = *palpha; + + int info = 0; + if(UPLO(*uplo)==INVALID) info = 1; + else if(*n<0) info = 2; + else if(*incx==0) info = 5; + if(info) + return xerbla_(SCALAR_SUFFIX_UP"HPR ",&info,6); + + if(alpha==Scalar(0)) + return 1; + + Scalar* x_cpy = get_compact_vector(x, *n, *incx); + + int code = UPLO(*uplo); + if(code>=2 || func[code]==0) + return 0; + + func[code](*n, ap, x_cpy, alpha); + + if(x_cpy!=x) delete[] x_cpy; + + return 1; +} /** ZHPR2 performs the hermitian rank 2 operation * diff --git a/blas/sspr.f b/blas/sspr.f deleted file mode 100644 index bae92612e..000000000 --- a/blas/sspr.f +++ /dev/null @@ -1,202 +0,0 @@ - SUBROUTINE SSPR(UPLO,N,ALPHA,X,INCX,AP) -* .. Scalar Arguments .. - REAL ALPHA - INTEGER INCX,N - CHARACTER UPLO -* .. -* .. Array Arguments .. - REAL AP(*),X(*) -* .. -* -* Purpose -* ======= -* -* SSPR performs the symmetric rank 1 operation -* -* A := alpha*x*x' + A, -* -* where alpha is a real scalar, x is an n element vector and A is an -* n by n symmetric matrix, supplied in packed form. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the upper or lower -* triangular part of the matrix A is supplied in the packed -* array AP as follows: -* -* UPLO = 'U' or 'u' The upper triangular part of A is -* supplied in AP. -* -* UPLO = 'L' or 'l' The lower triangular part of A is -* supplied in AP. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* ALPHA - REAL . -* On entry, ALPHA specifies the scalar alpha. -* Unchanged on exit. -* -* X - REAL array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. -* Unchanged on exit. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* AP - REAL array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular part of the symmetric matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) -* and a( 2, 2 ) respectively, and so on. On exit, the array -* AP is overwritten by the upper triangular part of the -* updated matrix. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular part of the symmetric matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) -* and a( 3, 1 ) respectively, and so on. On exit, the array -* AP is overwritten by the lower triangular part of the -* updated matrix. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - REAL ZERO - PARAMETER (ZERO=0.0E+0) -* .. -* .. Local Scalars .. - REAL TEMP - INTEGER I,INFO,IX,J,JX,K,KK,KX -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (N.LT.0) THEN - INFO = 2 - ELSE IF (INCX.EQ.0) THEN - INFO = 5 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('SSPR ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN -* -* Set the start point in X if the increment is not unity. -* - IF (INCX.LE.0) THEN - KX = 1 - (N-1)*INCX - ELSE IF (INCX.NE.1) THEN - KX = 1 - END IF -* -* Start the operations. In this version the elements of the array AP -* are accessed sequentially with one pass through AP. -* - KK = 1 - IF (LSAME(UPLO,'U')) THEN -* -* Form A when upper triangle is stored in AP. -* - IF (INCX.EQ.1) THEN - DO 20 J = 1,N - IF (X(J).NE.ZERO) THEN - TEMP = ALPHA*X(J) - K = KK - DO 10 I = 1,J - AP(K) = AP(K) + X(I)*TEMP - K = K + 1 - 10 CONTINUE - END IF - KK = KK + J - 20 CONTINUE - ELSE - JX = KX - DO 40 J = 1,N - IF (X(JX).NE.ZERO) THEN - TEMP = ALPHA*X(JX) - IX = KX - DO 30 K = KK,KK + J - 1 - AP(K) = AP(K) + X(IX)*TEMP - IX = IX + INCX - 30 CONTINUE - END IF - JX = JX + INCX - KK = KK + J - 40 CONTINUE - END IF - ELSE -* -* Form A when lower triangle is stored in AP. -* - IF (INCX.EQ.1) THEN - DO 60 J = 1,N - IF (X(J).NE.ZERO) THEN - TEMP = ALPHA*X(J) - K = KK - DO 50 I = J,N - AP(K) = AP(K) + X(I)*TEMP - K = K + 1 - 50 CONTINUE - END IF - KK = KK + N - J + 1 - 60 CONTINUE - ELSE - JX = KX - DO 80 J = 1,N - IF (X(JX).NE.ZERO) THEN - TEMP = ALPHA*X(JX) - IX = JX - DO 70 K = KK,KK + N - J - AP(K) = AP(K) + X(IX)*TEMP - IX = IX + INCX - 70 CONTINUE - END IF - JX = JX + INCX - KK = KK + N - J + 1 - 80 CONTINUE - END IF - END IF -* - RETURN -* -* End of SSPR . -* - END diff --git a/blas/zhpr.f b/blas/zhpr.f deleted file mode 100644 index 40efbc7d5..000000000 --- a/blas/zhpr.f +++ /dev/null @@ -1,220 +0,0 @@ - SUBROUTINE ZHPR(UPLO,N,ALPHA,X,INCX,AP) -* .. Scalar Arguments .. - DOUBLE PRECISION ALPHA - INTEGER INCX,N - CHARACTER UPLO -* .. -* .. Array Arguments .. - DOUBLE COMPLEX AP(*),X(*) -* .. -* -* Purpose -* ======= -* -* ZHPR performs the hermitian rank 1 operation -* -* A := alpha*x*conjg( x' ) + A, -* -* where alpha is a real scalar, x is an n element vector and A is an -* n by n hermitian matrix, supplied in packed form. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the upper or lower -* triangular part of the matrix A is supplied in the packed -* array AP as follows: -* -* UPLO = 'U' or 'u' The upper triangular part of A is -* supplied in AP. -* -* UPLO = 'L' or 'l' The lower triangular part of A is -* supplied in AP. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* ALPHA - DOUBLE PRECISION. -* On entry, ALPHA specifies the scalar alpha. -* Unchanged on exit. -* -* X - COMPLEX*16 array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. -* Unchanged on exit. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* AP - COMPLEX*16 array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular part of the hermitian matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) -* and a( 2, 2 ) respectively, and so on. On exit, the array -* AP is overwritten by the upper triangular part of the -* updated matrix. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular part of the hermitian matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) -* and a( 3, 1 ) respectively, and so on. On exit, the array -* AP is overwritten by the lower triangular part of the -* updated matrix. -* Note that the imaginary parts of the diagonal elements need -* not be set, they are assumed to be zero, and on exit they -* are set to zero. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - DOUBLE COMPLEX ZERO - PARAMETER (ZERO= (0.0D+0,0.0D+0)) -* .. -* .. Local Scalars .. - DOUBLE COMPLEX TEMP - INTEGER I,INFO,IX,J,JX,K,KK,KX -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC DBLE,DCONJG -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (N.LT.0) THEN - INFO = 2 - ELSE IF (INCX.EQ.0) THEN - INFO = 5 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('ZHPR ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF ((N.EQ.0) .OR. (ALPHA.EQ.DBLE(ZERO))) RETURN -* -* Set the start point in X if the increment is not unity. -* - IF (INCX.LE.0) THEN - KX = 1 - (N-1)*INCX - ELSE IF (INCX.NE.1) THEN - KX = 1 - END IF -* -* Start the operations. In this version the elements of the array AP -* are accessed sequentially with one pass through AP. -* - KK = 1 - IF (LSAME(UPLO,'U')) THEN -* -* Form A when upper triangle is stored in AP. -* - IF (INCX.EQ.1) THEN - DO 20 J = 1,N - IF (X(J).NE.ZERO) THEN - TEMP = ALPHA*DCONJG(X(J)) - K = KK - DO 10 I = 1,J - 1 - AP(K) = AP(K) + X(I)*TEMP - K = K + 1 - 10 CONTINUE - AP(KK+J-1) = DBLE(AP(KK+J-1)) + DBLE(X(J)*TEMP) - ELSE - AP(KK+J-1) = DBLE(AP(KK+J-1)) - END IF - KK = KK + J - 20 CONTINUE - ELSE - JX = KX - DO 40 J = 1,N - IF (X(JX).NE.ZERO) THEN - TEMP = ALPHA*DCONJG(X(JX)) - IX = KX - DO 30 K = KK,KK + J - 2 - AP(K) = AP(K) + X(IX)*TEMP - IX = IX + INCX - 30 CONTINUE - AP(KK+J-1) = DBLE(AP(KK+J-1)) + DBLE(X(JX)*TEMP) - ELSE - AP(KK+J-1) = DBLE(AP(KK+J-1)) - END IF - JX = JX + INCX - KK = KK + J - 40 CONTINUE - END IF - ELSE -* -* Form A when lower triangle is stored in AP. -* - IF (INCX.EQ.1) THEN - DO 60 J = 1,N - IF (X(J).NE.ZERO) THEN - TEMP = ALPHA*DCONJG(X(J)) - AP(KK) = DBLE(AP(KK)) + DBLE(TEMP*X(J)) - K = KK + 1 - DO 50 I = J + 1,N - AP(K) = AP(K) + X(I)*TEMP - K = K + 1 - 50 CONTINUE - ELSE - AP(KK) = DBLE(AP(KK)) - END IF - KK = KK + N - J + 1 - 60 CONTINUE - ELSE - JX = KX - DO 80 J = 1,N - IF (X(JX).NE.ZERO) THEN - TEMP = ALPHA*DCONJG(X(JX)) - AP(KK) = DBLE(AP(KK)) + DBLE(TEMP*X(JX)) - IX = JX - DO 70 K = KK + 1,KK + N - J - IX = IX + INCX - AP(K) = AP(K) + X(IX)*TEMP - 70 CONTINUE - ELSE - AP(KK) = DBLE(AP(KK)) - END IF - JX = JX + INCX - KK = KK + N - J + 1 - 80 CONTINUE - END IF - END IF -* - RETURN -* -* End of ZHPR . -* - END -- cgit v1.2.3