// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2008-2009 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 3 of the License, or (at your option) any later version. // // Alternatively, you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2 of // the License, or (at your option) any later version. // // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the // GNU General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License and a copy of the GNU General Public License along with // Eigen. If not, see . #ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H #define EIGEN_SELFADJOINT_MATRIX_VECTOR_H /* Optimized selfadjoint matrix * vector product: * This algorithm processes 2 columns at onces that allows to both reduce * the number of load/stores of the result by a factor 2 and to reduce * the instruction dependency. */ template static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector( int size, const Scalar* lhs, int lhsStride, const Scalar* _rhs, int rhsIncr, Scalar* res, Scalar alpha) { typedef typename ei_packet_traits::type Packet; const int PacketSize = sizeof(Packet)/sizeof(Scalar); enum { IsRowMajor = StorageOrder==RowMajor ? 1 : 0, IsLower = UpLo == Lower ? 1 : 0, FirstTriangular = IsRowMajor == IsLower }; ei_conj_helper::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0; ei_conj_helper::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1; Scalar cjAlpha = ConjugateRhs ? ei_conj(alpha) : alpha; // if the rhs is not sequentially stored in memory we copy it to a temporary buffer, // this is because we need to extract packets const Scalar* EIGEN_RESTRICT rhs = _rhs; if (rhsIncr!=1) { Scalar* r = ei_aligned_stack_new(Scalar, size); const Scalar* it = _rhs; for (int i=0; i huge speed up) // gcc 4.2 does this optimization automatically. const Scalar* EIGEN_RESTRICT a0It = A0 + alignedStart; const Scalar* EIGEN_RESTRICT a1It = A1 + alignedStart; const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart; Scalar* EIGEN_RESTRICT resIt = res + alignedStart; for (size_t i=alignedStart; i(rhs), size); } /*************************************************************************** * Wrapper to ei_product_selfadjoint_vector ***************************************************************************/ template struct ei_traits > : ei_traits, Lhs, Rhs> > {}; template struct SelfadjointProductMatrix : public ProductBase, Lhs, Rhs > { EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) enum { LhsUpLo = LhsMode&(Upper|Lower) }; SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} template void scaleAndAddTo(Dest& dst, Scalar alpha) const { ei_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) * RhsBlasTraits::extractScalarFactor(m_rhs); ei_assert(dst.innerStride()==1 && "not implemented yet"); ei_product_selfadjoint_vector::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)> ( lhs.rows(), // size &lhs.coeff(0,0), lhs.innerStride(), // lhs info &rhs.coeff(0), rhs.innerStride(), // rhs info &dst.coeffRef(0), // result info actualAlpha // scale factor ); } }; #endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H