From b0b9b4d6b223cfbfee564427514cd6d693e1c503 Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sat, 8 Sep 2012 01:39:16 +0800 Subject: Implement functors for rank-1 and rank-2 update. --- blas/GeneralRank1Update.h | 30 ++++++++++++++++ blas/Rank2Update.h | 57 ++++++++++++++++++++++++++++++ blas/common.h | 2 ++ blas/level2_cplx_impl.h | 57 +++++++++++++++++++++--------- blas/level2_impl.h | 46 +++--------------------- blas/level2_real_impl.h | 89 +++++++++++++++++++++++++++++++++++++++-------- blas/level3_impl.h | 2 ++ 7 files changed, 212 insertions(+), 71 deletions(-) create mode 100644 blas/GeneralRank1Update.h create mode 100644 blas/Rank2Update.h (limited to 'blas') diff --git a/blas/GeneralRank1Update.h b/blas/GeneralRank1Update.h new file mode 100644 index 000000000..a3301ed92 --- /dev/null +++ b/blas/GeneralRank1Update.h @@ -0,0 +1,30 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Chen-Pang He +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GENERAL_RANK1UPDATE_H +#define EIGEN_GENERAL_RANK1UPDATE_H + +namespace internal { + +/* Optimized matrix += alpha * uv' */ +template +struct general_rank1_update +{ + static void run(Index rows, Index cols, Scalar* mat, Index stride, const Scalar* u, const Scalar* v, Scalar alpha) + { + typedef Matrix PlainVector; + internal::conj_if cj; + for (Index i=0; i(mat+stride*i,rows) += alpha * cj(v[i]) * Map(u,rows); + } +}; + +} // end namespace internal + +#endif // EIGEN_GENERAL_RANK1UPDATE_H diff --git a/blas/Rank2Update.h b/blas/Rank2Update.h new file mode 100644 index 000000000..e7a5eeaba --- /dev/null +++ b/blas/Rank2Update.h @@ -0,0 +1,57 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Chen-Pang He +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_RANK2UPDATE_H +#define EIGEN_RANK2UPDATE_H + +namespace internal { + +/* Optimized selfadjoint matrix += alpha * uv' + conj(alpha)*vu' + * This is the low-level version of SelfadjointRank2Update.h + */ +template +struct rank2_update_selector; + +template +struct rank2_update_selector +{ + static void run(Index size, Scalar* mat, Index stride, const Scalar* _u, const Scalar* _v, Scalar alpha) + { + typedef Matrix PlainVector; + Map u(_u, size), v(_v, size); + + for (Index i=0; i(mat+stride*i, i+1) += + conj(alpha) * conj(_u[i]) * v.head(i+1) + + alpha * conj(_v[i]) * u.head(i+1); + } + } +}; + +template +struct rank2_update_selector +{ + static void run(Index size, Scalar* mat, Index stride, const Scalar* _u, const Scalar* _v, Scalar alpha) + { + typedef Matrix PlainVector; + Map u(_u, size), v(_v, size); + + for (Index i=0; i(mat+(stride+1)*i, size-i) += + conj(alpha) * conj(_u[i]) * v.tail(size-i) + + alpha * conj(_v[i]) * u.tail(size-i); + } + } +}; + +} // end namespace internal + +#endif // EIGEN_RANK2UPDATE_H diff --git a/blas/common.h b/blas/common.h index cd7839114..26b4ed5a3 100644 --- a/blas/common.h +++ b/blas/common.h @@ -74,6 +74,8 @@ inline bool check_uplo(const char* uplo) namespace Eigen { #include "BandTriangularSolver.h" +#include "GeneralRank1Update.h" +#include "Rank2Update.h" } using namespace Eigen; diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h index 7878f2a16..477f6d649 100644 --- a/blas/level2_cplx_impl.h +++ b/blas/level2_cplx_impl.h @@ -117,6 +117,21 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa */ int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda) { + typedef void (*functype)(int, Scalar*, int, const Scalar*, Scalar); + static functype func[2]; + + static bool init = false; + if(!init) + { + for(int k=0; k<2; ++k) + func[k] = 0; + + func[UP] = (selfadjoint_rank1_update::run); + func[LO] = (selfadjoint_rank1_update::run); + + init = true; + } + Scalar* x = reinterpret_cast(px); Scalar* a = reinterpret_cast(pa); RealScalar alpha = *reinterpret_cast(palpha); @@ -134,16 +149,11 @@ int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, Scalar* x_cpy = get_compact_vector(x, *n, *incx); - // TODO perform direct calls to underlying implementation -// if(UPLO(*uplo)==LO) matrix(a,*n,*n,*lda).selfadjointView().rankUpdate(vector(x_cpy,*n), alpha); -// else if(UPLO(*uplo)==UP) matrix(a,*n,*n,*lda).selfadjointView().rankUpdate(vector(x_cpy,*n), alpha); + int code = UPLO(*uplo); + if(code>=2 || func[code]==0) + return 0; - if(UPLO(*uplo)==LO) - for(int j=0;j<*n;++j) - matrix(a,*n,*n,*lda).col(j).tail(*n-j) += alpha * internal::conj(x_cpy[j]) * vector(x_cpy+j,*n-j); - else - for(int j=0;j<*n;++j) - matrix(a,*n,*n,*lda).col(j).head(j+1) += alpha * internal::conj(x_cpy[j]) * vector(x_cpy,j+1); + func[code](*n, a, *lda, x_cpy, alpha); matrix(a,*n,*n,*lda).diagonal().imag().setZero(); @@ -161,6 +171,21 @@ int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, */ int EIGEN_BLAS_FUNC(her2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda) { + typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar); + static functype func[2]; + + static bool init = false; + if(!init) + { + for(int k=0; k<2; ++k) + func[k] = 0; + + func[UP] = (internal::rank2_update_selector::run); + func[LO] = (internal::rank2_update_selector::run); + + init = true; + } + Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); Scalar* a = reinterpret_cast(pa); @@ -181,9 +206,11 @@ int EIGEN_BLAS_FUNC(her2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px Scalar* x_cpy = get_compact_vector(x, *n, *incx); Scalar* y_cpy = get_compact_vector(y, *n, *incy); - // TODO perform direct calls to underlying implementation - if(UPLO(*uplo)==LO) matrix(a,*n,*n,*lda).selfadjointView().rankUpdate(vector(x_cpy,*n),vector(y_cpy,*n),alpha); - else if(UPLO(*uplo)==UP) matrix(a,*n,*n,*lda).selfadjointView().rankUpdate(vector(x_cpy,*n),vector(y_cpy,*n),alpha); + int code = UPLO(*uplo); + if(code>=2 || func[code]==0) + return 0; + + func[code](*n, a, *lda, x_cpy, y_cpy, alpha); matrix(a,*n,*n,*lda).diagonal().imag().setZero(); @@ -222,8 +249,7 @@ int EIGEN_BLAS_FUNC(geru)(int *m, int *n, RealScalar *palpha, RealScalar *px, in Scalar* x_cpy = get_compact_vector(x,*m,*incx); Scalar* y_cpy = get_compact_vector(y,*n,*incy); - // TODO perform direct calls to underlying implementation - matrix(a,*m,*n,*lda) += alpha * vector(x_cpy,*m) * vector(y_cpy,*n).transpose(); + internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); if(x_cpy!=x) delete[] x_cpy; if(y_cpy!=y) delete[] y_cpy; @@ -260,8 +286,7 @@ int EIGEN_BLAS_FUNC(gerc)(int *m, int *n, RealScalar *palpha, RealScalar *px, in Scalar* x_cpy = get_compact_vector(x,*m,*incx); Scalar* y_cpy = get_compact_vector(y,*n,*incy); - // TODO perform direct calls to underlying implementation - matrix(a,*m,*n,*lda) += alpha * vector(x_cpy,*m) * vector(y_cpy,*n).adjoint(); + internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); if(x_cpy!=x) delete[] x_cpy; if(y_cpy!=y) delete[] y_cpy; diff --git a/blas/level2_impl.h b/blas/level2_impl.h index 7099cf96d..f1f7371ee 100644 --- a/blas/level2_impl.h +++ b/blas/level2_impl.h @@ -49,7 +49,8 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca int actual_m = *m; int actual_n = *n; - if(OP(*opa)!=NOTR) + int code = OP(*opa); + if(code!=NOTR) std::swap(actual_m,actual_n); Scalar* actual_b = get_compact_vector(b,actual_n,*incb); @@ -61,7 +62,9 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca else vector(actual_c, actual_m) *= beta; } - int code = OP(*opa); + if(code>=4 || func[code]==0) + return 0; + func[code](actual_m, actual_n, a, *lda, actual_b, 1, actual_c, 1, alpha); if(actual_b!=b) delete[] actual_b; @@ -416,42 +419,3 @@ int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, Real // return 1; // } -/** DGER performs the rank 1 operation - * - * A := alpha*x*y' + A, - * - * where alpha is a scalar, x is an m element vector, y is an n element - * vector and A is an m by n matrix. - */ -int EIGEN_BLAS_FUNC(ger)(int *m, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *py, int *incy, Scalar *pa, int *lda) -{ - Scalar* x = reinterpret_cast(px); - Scalar* y = reinterpret_cast(py); - Scalar* a = reinterpret_cast(pa); - Scalar alpha = *reinterpret_cast(palpha); - - int info = 0; - if(*m<0) info = 1; - else if(*n<0) info = 2; - else if(*incx==0) info = 5; - else if(*incy==0) info = 7; - else if(*lda::run); + func[LO] = (selfadjoint_rank1_update::run); + + init = true; + } Scalar* x = reinterpret_cast(px); Scalar* c = reinterpret_cast(pc); @@ -86,18 +100,11 @@ int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, // if the increment is not 1, let's copy it to a temporary vector to enable vectorization Scalar* x_cpy = get_compact_vector(x,*n,*incx); - Matrix m2(matrix(c,*n,*n,*ldc)); - - // TODO check why this is not accurate enough for lapack tests -// if(UPLO(*uplo)==LO) matrix(c,*n,*n,*ldc).selfadjointView().rankUpdate(vector(x_cpy,*n), alpha); -// else if(UPLO(*uplo)==UP) matrix(c,*n,*n,*ldc).selfadjointView().rankUpdate(vector(x_cpy,*n), alpha); + int code = UPLO(*uplo); + if(code>=2 || func[code]==0) + return 0; - if(UPLO(*uplo)==LO) - for(int j=0;j<*n;++j) - matrix(c,*n,*n,*ldc).col(j).tail(*n-j) += alpha * x_cpy[j] * vector(x_cpy+j,*n-j); - else - for(int j=0;j<*n;++j) - matrix(c,*n,*n,*ldc).col(j).head(j+1) += alpha * x_cpy[j] * vector(x_cpy,j+1); + func[code](*n, c, *ldc, x_cpy, alpha); if(x_cpy!=x) delete[] x_cpy; @@ -121,6 +128,20 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px // // init = true; // } + typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar); + static functype func[2]; + + static bool init = false; + if(!init) + { + for(int k=0; k<2; ++k) + func[k] = 0; + + func[UP] = (internal::rank2_update_selector::run); + func[LO] = (internal::rank2_update_selector::run); + + init = true; + } Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); @@ -141,10 +162,12 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px Scalar* x_cpy = get_compact_vector(x,*n,*incx); Scalar* y_cpy = get_compact_vector(y,*n,*incy); + + int code = UPLO(*uplo); + if(code>=2 || func[code]==0) + return 0; - // TODO perform direct calls to underlying implementation - if(UPLO(*uplo)==LO) matrix(c,*n,*n,*ldc).selfadjointView().rankUpdate(vector(x_cpy,*n), vector(y_cpy,*n), alpha); - else if(UPLO(*uplo)==UP) matrix(c,*n,*n,*ldc).selfadjointView().rankUpdate(vector(x_cpy,*n), vector(y_cpy,*n), alpha); + func[code](*n, c, *ldc, x_cpy, y_cpy, alpha); if(x_cpy!=x) delete[] x_cpy; if(y_cpy!=y) delete[] y_cpy; @@ -208,3 +231,41 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px // return 1; // } +/** DGER performs the rank 1 operation + * + * A := alpha*x*y' + A, + * + * where alpha is a scalar, x is an m element vector, y is an n element + * vector and A is an m by n matrix. + */ +int EIGEN_BLAS_FUNC(ger)(int *m, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *py, int *incy, Scalar *pa, int *lda) +{ + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + Scalar* a = reinterpret_cast(pa); + Scalar alpha = *reinterpret_cast(palpha); + + int info = 0; + if(*m<0) info = 1; + else if(*n<0) info = 2; + else if(*incx==0) info = 5; + else if(*incy==0) info = 7; + else if(*lda::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); + + if(x_cpy!=x) delete[] x_cpy; + if(y_cpy!=y) delete[] y_cpy; + + return 1; +} + + diff --git a/blas/level3_impl.h b/blas/level3_impl.h index 2371f25c3..84c9f4f2b 100644 --- a/blas/level3_impl.h +++ b/blas/level3_impl.h @@ -305,6 +305,7 @@ int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *pa int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc) { // std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n"; + #if !ISCOMPLEX typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar); static functype func[8]; @@ -324,6 +325,7 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp init = true; } + #endif Scalar* a = reinterpret_cast(pa); Scalar* c = reinterpret_cast(pc); -- cgit v1.2.3