// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2009 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 3 of the License, or (at your option) any later version. // // Alternatively, you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2 of // the License, or (at your option) any later version. // // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the // GNU General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License and a copy of the GNU General Public License along with // Eigen. If not, see . #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H // pack a selfadjoint block diagonal for use with the gebp_kernel template struct ei_symm_pack_lhs { void operator()(Scalar* blockA, const Scalar* _lhs, int lhsStride, int actual_kc, int actual_mc) { ei_const_blas_data_mapper lhs(_lhs,lhsStride); int count = 0; const int peeled_mc = (actual_mc/mr)*mr; for(int i=0; i struct ei_symm_pack_rhs { enum { PacketSize = ei_packet_traits::size }; void operator()(Scalar* blockB, const Scalar* _rhs, int rhsStride, Scalar alpha, int actual_kc, int packet_cols, int cols, int k2) { int end_k = k2 + actual_kc; int count = 0; ei_const_blas_data_mapper rhs(_rhs,rhsStride); // first part: normal case for(int j2=0; j2 the same with nr==1) for(int j2=packet_cols; j2 struct ei_product_selfadjoint_matrix; template struct ei_product_selfadjoint_matrix { static EIGEN_STRONG_INLINE void run( int rows, int cols, const Scalar* lhs, int lhsStride, const Scalar* rhs, int rhsStride, Scalar* res, int resStride, Scalar alpha) { ei_product_selfadjoint_matrix::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs), EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor, LhsSelfAdjoint, NumTraits::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs), ColMajor> ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha); } }; template struct ei_product_selfadjoint_matrix { static EIGEN_DONT_INLINE void run( int rows, int cols, const Scalar* _lhs, int lhsStride, const Scalar* _rhs, int rhsStride, Scalar* res, int resStride, Scalar alpha) { int size = rows; ei_const_blas_data_mapper lhs(_lhs,lhsStride); ei_const_blas_data_mapper rhs(_rhs,rhsStride); if (ConjugateRhs) alpha = ei_conj(alpha); typedef ei_product_blocking_traits Blocking; int kc = std::min(Blocking::Max_kc,size); // cache block size along the K direction int mc = std::min(Blocking::Max_mc,rows); // cache block size along the M direction Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); Scalar* blockB = ei_aligned_stack_new(Scalar, kc*cols*Blocking::PacketSize); // number of columns which can be processed by packet of nr columns int packet_cols = (cols/Blocking::nr)*Blocking::nr; ei_gebp_kernel > gebp_kernel; for(int k2=0; k2() (blockB, &rhs(k2,0), rhsStride, alpha, actual_kc, packet_cols, cols); // the select lhs's panel has to be split in three different parts: // 1 - the transposed panel above the diagonal block => transposed packed copy // 2 - the diagonal block => special packed copy // 3 - the panel below the diagonal block => generic packed copy for(int i2=0; i2() (blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc); gebp_kernel(res, resStride, blockA, blockB, actual_mc, actual_kc, packet_cols, i2, cols); } // the block diagonal { const int actual_mc = std::min(k2+kc,size)-k2; // symmetric packed copy ei_symm_pack_lhs() (blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc); gebp_kernel(res, resStride, blockA, blockB, actual_mc, actual_kc, packet_cols, k2, cols); } for(int i2=k2+kc; i2() (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); gebp_kernel(res, resStride, blockA, blockB, actual_mc, actual_kc, packet_cols, i2, cols); } } ei_aligned_stack_delete(Scalar, blockA, kc*mc); ei_aligned_stack_delete(Scalar, blockB, kc*cols*Blocking::PacketSize); } }; // matrix * selfadjoint product template struct ei_product_selfadjoint_matrix { static EIGEN_DONT_INLINE void run( int rows, int cols, const Scalar* _lhs, int lhsStride, const Scalar* _rhs, int rhsStride, Scalar* res, int resStride, Scalar alpha) { int size = cols; ei_const_blas_data_mapper lhs(_lhs,lhsStride); ei_const_blas_data_mapper rhs(_rhs,rhsStride); if (ConjugateRhs) alpha = ei_conj(alpha); typedef ei_product_blocking_traits Blocking; int kc = std::min(Blocking::Max_kc,size); // cache block size along the K direction int mc = std::min(Blocking::Max_mc,rows); // cache block size along the M direction Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); Scalar* blockB = ei_aligned_stack_new(Scalar, kc*cols*Blocking::PacketSize); // number of columns which can be processed by packet of nr columns int packet_cols = (cols/Blocking::nr)*Blocking::nr; ei_gebp_kernel > gebp_kernel; for(int k2=0; k2() (blockB, _rhs, rhsStride, alpha, actual_kc, packet_cols, cols, k2); // => GEPP for(int i2=0; i2() (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); gebp_kernel(res, resStride, blockA, blockB, actual_mc, actual_kc, packet_cols, i2, cols); } } ei_aligned_stack_delete(Scalar, blockA, kc*mc); ei_aligned_stack_delete(Scalar, blockB, kc*cols*Blocking::PacketSize); } }; #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H