// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2008-2009 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 3 of the License, or (at your option) any later version. // // Alternatively, you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2 of // the License, or (at your option) any later version. // // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the // GNU General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License and a copy of the GNU General Public License along with // Eigen. If not, see . #ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H #define EIGEN_SELFADJOINT_MATRIX_VECTOR_H /* Optimized selfadjoint matrix * vector product: * This algorithm processes 2 columns at onces that allows to both reduce * the number of load/stores of the result by a factor 2 and to reduce * the instruction dependency. */ template static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector( int size, const Scalar* lhs, int lhsStride, const Scalar* _rhs, int rhsIncr, Scalar* res, Scalar alpha) { typedef typename ei_packet_traits::type Packet; const int PacketSize = sizeof(Packet)/sizeof(Scalar); enum { IsRowMajor = StorageOrder==RowMajorBit ? 1 : 0, IsLower = UpLo == LowerTriangularBit ? 1 : 0, FirstTriangular = IsRowMajor == IsLower }; ei_conj_helper::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0; ei_conj_helper::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1; Scalar cjAlpha = ConjugateRhs ? ei_conj(alpha) : alpha; // if the rhs is not sequentially stored in memory we copy it to a temporary buffer, // this is because we need to extract packets const Scalar* EIGEN_RESTRICT rhs = _rhs; if (rhsIncr!=1) { Scalar* r = ei_aligned_stack_new(Scalar, size); const Scalar* it = _rhs; for (int i=0; i huge speed up) // gcc 4.2 does this optimization automatically. const Scalar* EIGEN_RESTRICT a0It = A0 + alignedStart; const Scalar* EIGEN_RESTRICT a1It = A1 + alignedStart; const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart; Scalar* EIGEN_RESTRICT resIt = res + alignedStart; for (size_t i=alignedStart; i(rhs), size); } #endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H