From 669db3d7768b3b94d31d6552a1012ee29f54b8d8 Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sun, 9 Sep 2012 02:55:10 +0800 Subject: Extend rank-1 updates for different storage orders. --- blas/GeneralRank1Update.h | 21 ++++++++++++--- blas/PackedSelfadjointProduct.h | 59 +++++++++++++++++++++++++++++++++++++++++ blas/SelfadjointPackedProduct.h | 47 -------------------------------- blas/common.h | 2 +- blas/level2_cplx_impl.h | 4 +-- blas/level2_real_impl.h | 6 ++--- 6 files changed, 82 insertions(+), 57 deletions(-) create mode 100644 blas/PackedSelfadjointProduct.h delete mode 100644 blas/SelfadjointPackedProduct.h (limited to 'blas') diff --git a/blas/GeneralRank1Update.h b/blas/GeneralRank1Update.h index a3301ed92..6d33fbcc1 100644 --- a/blas/GeneralRank1Update.h +++ b/blas/GeneralRank1Update.h @@ -13,15 +13,28 @@ namespace internal { /* Optimized matrix += alpha * uv' */ -template -struct general_rank1_update +template +struct general_rank1_update; + +template +struct general_rank1_update { static void run(Index rows, Index cols, Scalar* mat, Index stride, const Scalar* u, const Scalar* v, Scalar alpha) { - typedef Matrix PlainVector; internal::conj_if cj; + typedef Map > OtherMap; + typedef typename internal::conditional::type ConjRhsType; for (Index i=0; i(mat+stride*i,rows) += alpha * cj(v[i]) * Map(u,rows); + Map >(mat+stride*i,rows) += alpha * cj(v[i]) * ConjRhsType(OtherMap(u,rows)); + } +}; + +template +struct general_rank1_update +{ + static void run(Index rows, Index cols, Scalar* mat, Index stride, const Scalar* u, const Scalar* v, Scalar alpha) + { + general_rank1_update::run(rows,cols,mat,stride,u,v,alpha); } }; diff --git a/blas/PackedSelfadjointProduct.h b/blas/PackedSelfadjointProduct.h new file mode 100644 index 000000000..adc86ece1 --- /dev/null +++ b/blas/PackedSelfadjointProduct.h @@ -0,0 +1,59 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Chen-Pang He +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SELFADJOINT_PACKED_PRODUCT_H +#define EIGEN_SELFADJOINT_PACKED_PRODUCT_H + +namespace internal { + +/* Optimized matrix += alpha * uv' + * The matrix is in packed form. + * + * FIXME I always fail tests for complex self-adjoint matrices. + * + ******* FATAL ERROR - PARAMETER NUMBER 6 WAS CHANGED INCORRECTLY ******* + ******* xHPR FAILED ON CALL NUMBER: + 2: xHPR ('U', 1, 0.0, X, 1, AP) + */ +template +struct selfadjoint_packed_rank1_update; + +template +struct selfadjoint_packed_rank1_update +{ + static void run(Index size, Scalar* mat, const Scalar* vec, Scalar alpha) + { + internal::conj_if cj; + typedef Map > OtherMap; + typedef typename internal::conditional::type ConjRhsType; + Index offset = 0; + + for (Index i=0; i >(mat+offset, UpLo==Lower ? size-i : (i+1)) + += alpha * cj(vec[i]) * ConjRhsType(OtherMap(vec+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1))); + //FIXME This should be handled outside. + mat[offset+(UpLo==Lower ? 0 : i)] = real(mat[offset+(UpLo==Lower ? 0 : i)]); + offset += UpLo==Lower ? size-i : (i+1); + } + } +}; + +template +struct selfadjoint_packed_rank1_update +{ + static void run(Index size, Scalar* mat, const Scalar* vec, Scalar alpha) + { + selfadjoint_packed_rank1_update::run(size,mat,vec,alpha); + } +}; + +} // end namespace internal + +#endif // EIGEN_SELFADJOINT_PACKED_PRODUCT_H diff --git a/blas/SelfadjointPackedProduct.h b/blas/SelfadjointPackedProduct.h deleted file mode 100644 index 4ea36b569..000000000 --- a/blas/SelfadjointPackedProduct.h +++ /dev/null @@ -1,47 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Chen-Pang He -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SELFADJOINT_PACKED_PRODUCT_H -#define EIGEN_SELFADJOINT_PACKED_PRODUCT_H - -namespace internal { - -/* Optimized matrix += alpha * uv' - * The matrix is in packed form. - * - * FIXME I always fail tests for complex self-adjoint matrices. - * - ******* FATAL ERROR - PARAMETER NUMBER 6 WAS CHANGED INCORRECTLY ******* - ******* xHPR FAILED ON CALL NUMBER: - 2: xHPR ('U', 1, 0.0, X, 1, AP) - */ -template -struct selfadjoint_packed_rank1_update -{ - static void run(Index size, Scalar* mat, const Scalar* vec, Scalar alpha) - { - typedef Map > OtherMap; - Index offset = 0; - - for (Index i=0; i >(mat+offset, UpLo==Lower ? size-i : (i+1)) - += alpha * conj(vec[i]) * OtherMap(vec+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1)); - //FIXME This should be handled outside. - mat[offset+(UpLo==Lower ? 0 : i)] = real(mat[offset+(UpLo==Lower ? 0 : i)]); - offset += UpLo==Lower ? size-i : (i+1); - } - } -}; - -//TODO struct selfadjoint_packed_product_selector - -} // end namespace internal - -#endif // EIGEN_SELFADJOINT_PACKED_PRODUCT_H diff --git a/blas/common.h b/blas/common.h index a14d32289..3160d3b41 100644 --- a/blas/common.h +++ b/blas/common.h @@ -75,8 +75,8 @@ inline bool check_uplo(const char* uplo) namespace Eigen { #include "BandTriangularSolver.h" #include "GeneralRank1Update.h" +#include "PackedSelfadjointProduct.h" #include "Rank2Update.h" -#include "SelfadjointPackedProduct.h" } using namespace Eigen; diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h index 46bddc134..11ee13b4c 100644 --- a/blas/level2_cplx_impl.h +++ b/blas/level2_cplx_impl.h @@ -309,7 +309,7 @@ int EIGEN_BLAS_FUNC(geru)(int *m, int *n, RealScalar *palpha, RealScalar *px, in Scalar* x_cpy = get_compact_vector(x,*m,*incx); Scalar* y_cpy = get_compact_vector(y,*n,*incy); - internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); + internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); if(x_cpy!=x) delete[] x_cpy; if(y_cpy!=y) delete[] y_cpy; @@ -346,7 +346,7 @@ int EIGEN_BLAS_FUNC(gerc)(int *m, int *n, RealScalar *palpha, RealScalar *px, in Scalar* x_cpy = get_compact_vector(x,*m,*incx); Scalar* y_cpy = get_compact_vector(y,*n,*incy); - internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); + internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); if(x_cpy!=x) delete[] x_cpy; if(y_cpy!=y) delete[] y_cpy; diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h index 735545e2b..38b0dadb6 100644 --- a/blas/level2_real_impl.h +++ b/blas/level2_real_impl.h @@ -242,8 +242,8 @@ int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *in for(int k=0; k<2; ++k) func[k] = 0; - func[UP] = (internal::selfadjoint_packed_rank1_update::run); - func[LO] = (internal::selfadjoint_packed_rank1_update::run); + func[UP] = (internal::selfadjoint_packed_rank1_update::run); + func[LO] = (internal::selfadjoint_packed_rank1_update::run); init = true; } @@ -359,7 +359,7 @@ int EIGEN_BLAS_FUNC(ger)(int *m, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar* x_cpy = get_compact_vector(x,*m,*incx); Scalar* y_cpy = get_compact_vector(y,*n,*incy); - internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); + internal::general_rank1_update::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha); if(x_cpy!=x) delete[] x_cpy; if(y_cpy!=y) delete[] y_cpy; -- cgit v1.2.3