From e4e7585a24a4ef08742b4c198ab6e37e93eececf Mon Sep 17 00:00:00 2001 From: Chen-Pang He Date: Sat, 8 Sep 2012 17:29:44 +0800 Subject: Implement rank-2 update for packed matrices. --- blas/Rank2Update.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 6 deletions(-) (limited to 'blas/Rank2Update.h') diff --git a/blas/Rank2Update.h b/blas/Rank2Update.h index e7a5eeaba..0cf3a1961 100644 --- a/blas/Rank2Update.h +++ b/blas/Rank2Update.h @@ -28,9 +28,8 @@ struct rank2_update_selector for (Index i=0; i(mat+stride*i, i+1) += - conj(alpha) * conj(_u[i]) * v.head(i+1) - + alpha * conj(_v[i]) * u.head(i+1); + Map(mat+stride*i, i+1) += conj(alpha) * conj(_u[i]) * v.head(i+1) + + alpha * conj(_v[i]) * u.head(i+1); } } }; @@ -45,9 +44,52 @@ struct rank2_update_selector for (Index i=0; i(mat+(stride+1)*i, size-i) += - conj(alpha) * conj(_u[i]) * v.tail(size-i) - + alpha * conj(_v[i]) * u.tail(size-i); + Map(mat+(stride+1)*i, size-i) += conj(alpha) * conj(_u[i]) * v.tail(size-i) + + alpha * conj(_v[i]) * u.tail(size-i); + } + } +}; + +/* Optimized selfadjoint matrix += alpha * uv' + conj(alpha)*vu' + * The matrix is in packed form. + */ +template +struct packed_rank2_update_selector; + +template +struct packed_rank2_update_selector +{ + static void run(Index size, Scalar* mat, const Scalar* _u, const Scalar* _v, Scalar alpha) + { + typedef Matrix PlainVector; + Map u(_u, size), v(_v, size); + Index offset = 0; + + for (Index i=0; i(mat+offset, i+1) += conj(alpha) * conj(_u[i]) * v.head(i+1) + + alpha * conj(_v[i]) * u.head(i+1); + mat[offset+i] = real(mat[offset+i]); + } + } +}; + +template +struct packed_rank2_update_selector +{ + static void run(Index size, Scalar* mat, const Scalar* _u, const Scalar* _v, Scalar alpha) + { + typedef Matrix PlainVector; + Map u(_u, size), v(_v, size); + Index offset = 0; + + for (Index i=0; i(mat+offset, size-i) += conj(alpha) * conj(_u[i]) * v.tail(size-i) + + alpha * conj(_v[i]) * u.tail(size-i); + mat[offset] = real(mat[offset]); + offset += size-i; } } }; -- cgit v1.2.3