// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2008-2009 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 3 of the License, or (at your option) any later version. // // Alternatively, you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2 of // the License, or (at your option) any later version. // // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the // GNU General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License and a copy of the GNU General Public License along with // Eigen. If not, see . #ifndef EIGEN_GENERAL_BLOCK_PANEL_H #define EIGEN_GENERAL_BLOCK_PANEL_H #ifndef EIGEN_EXTERN_INSTANTIATIONS #ifdef EIGEN_HAS_FUSE_CJMADD #define CJMADD(A,B,C,T) C = cj.pmadd(A,B,C); #else #define CJMADD(A,B,C,T) T = B; T = cj.pmul(A,T); C = ei_padd(C,T); // #define CJMADD(A,B,C,T) T = A; T = cj.pmul(T,B); C = ei_padd(C,T); #endif // optimized GEneral packed Block * packed Panel product kernel template struct ei_gebp_kernel { void operator()(Scalar* res, Index resStride, const Scalar* blockA, const Scalar* blockB, Index rows, Index depth, Index cols, Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, Scalar* unpackedB = 0) { typedef typename ei_packet_traits::type PacketType; enum { PacketSize = ei_packet_traits::size }; if(strideA==-1) strideA = depth; if(strideB==-1) strideB = depth; Conj cj; Index packet_cols = (cols/nr) * nr; const Index peeled_mc = (rows/mr)*mr; const Index peeled_mc2 = peeled_mc + (rows-peeled_mc >= PacketSize ? PacketSize : 0); const Index peeled_kc = (depth/4)*4; if(unpackedB==0) unpackedB = const_cast(blockB - strideB * nr * PacketSize); // loops on each micro vertical panel of rhs (depth x nr) for(Index j2=0; j2 we select a mr x nr micro block of res which is entirely // stored into mr/packet_size x nr registers. for(Index i=0; i=PacketSize) { Index i = peeled_mc; const Scalar* blA = &blockA[i*strideA+offsetA*PacketSize]; ei_prefetch(&blA[0]); // gets res block as register PacketType C0, C1, C2, C3; C0 = ei_ploadu(&res[(j2+0)*resStride + i]); C1 = ei_ploadu(&res[(j2+1)*resStride + i]); if(nr==4) C2 = ei_ploadu(&res[(j2+2)*resStride + i]); if(nr==4) C3 = ei_ploadu(&res[(j2+3)*resStride + i]); // performs "inner" product const Scalar* blB = unpackedB; for(Index k=0; k do the same but with nr==1 for(Index j2=packet_cols; j2=PacketSize) { Index i = peeled_mc; const Scalar* blA = &blockA[i*strideA+offsetA*PacketSize]; ei_prefetch(&blA[0]); PacketType C0 = ei_ploadu(&res[(j2+0)*resStride + i]); const Scalar* blB = unpackedB; for(Index k=0; k struct ei_gemm_pack_lhs { void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride=0, Index offset=0) { enum { PacketSize = ei_packet_traits::size }; ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); ei_conj_if::IsComplex && Conjugate> cj; ei_const_blas_data_mapper lhs(_lhs,lhsStride); Index count = 0; Index peeled_mc = (rows/mr)*mr; for(Index i=0; i=PacketSize) { if(PanelMode) count += PacketSize*offset; for(Index k=0; k struct ei_gemm_pack_rhs { typedef typename ei_packet_traits::type Packet; enum { PacketSize = ei_packet_traits::size }; void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Scalar alpha, Index depth, Index cols, Index stride=0, Index offset=0) { ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); bool hasAlpha = alpha != Scalar(1); Index packet_cols = (cols/nr) * nr; Index count = 0; for(Index j2=0; j2 struct ei_gemm_pack_rhs { enum { PacketSize = ei_packet_traits::size }; void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Scalar alpha, Index depth, Index cols, Index stride=0, Index offset=0) { ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); bool hasAlpha = alpha != Scalar(1); Index packet_cols = (cols/nr) * nr; Index count = 0; for(Index j2=0; j2