// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2008-2009 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 3 of the License, or (at your option) any later version. // // Alternatively, you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2 of // the License, or (at your option) any later version. // // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the // GNU General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License and a copy of the GNU General Public License along with // Eigen. If not, see . #ifndef EIGEN_GENERAL_BLOCK_PANEL_H #define EIGEN_GENERAL_BLOCK_PANEL_H #ifndef EIGEN_EXTERN_INSTANTIATIONS #ifdef EIGEN_HAS_FUSE_CJMADD #define CJMADD(A,B,C,T) C = cj.pmadd(A,B,C); #else #define CJMADD(A,B,C,T) T = B; T = cj.pmul(A,T); C = ei_padd(C,T); #endif // optimized GEneral packed Block * packed Panel product kernel template struct ei_gebp_kernel { void operator()(Scalar* res, int resStride, const Scalar* blockA, const Scalar* blockB, int rows, int depth, int cols, int strideA=-1, int strideB=-1, int offsetA=0, int offsetB=0, Scalar* unpackedB = 0) { typedef typename ei_packet_traits::type PacketType; enum { PacketSize = ei_packet_traits::size }; if(strideA==-1) strideA = depth; if(strideB==-1) strideB = depth; Conj cj; int packet_cols = (cols/nr) * nr; const int peeled_mc = (rows/mr)*mr; const int peeled_mc2 = peeled_mc + (rows-peeled_mc >= PacketSize ? PacketSize : 0); const int peeled_kc = (depth/4)*4; if(unpackedB==0) unpackedB = const_cast(blockB - strideB * nr * PacketSize); // loops on each micro vertical panel of rhs (depth x nr) for(int j2=0; j2 we select a mr x nr micro block of res which is entirely // stored into mr/packet_size x nr registers. for(int i=0; i=PacketSize) { int i = peeled_mc; const Scalar* blA = &blockA[i*strideA+offsetA*PacketSize]; #ifdef EIGEN_VECTORIZE_SSE _mm_prefetch((const char*)(&blA[0]), _MM_HINT_T0); #endif // gets res block as register PacketType C0, C1, C2, C3; C0 = ei_ploadu(&res[(j2+0)*resStride + i]); C1 = ei_ploadu(&res[(j2+1)*resStride + i]); if(nr==4) C2 = ei_ploadu(&res[(j2+2)*resStride + i]); if(nr==4) C3 = ei_ploadu(&res[(j2+3)*resStride + i]); // performs "inner" product const Scalar* blB = unpackedB; for(int k=0; k do the same but with nr==1 for(int j2=packet_cols; j2=PacketSize) { int i = peeled_mc; const Scalar* blA = &blockA[i*strideA+offsetA*PacketSize]; #ifdef EIGEN_VECTORIZE_SSE _mm_prefetch((const char*)(&blA[0]), _MM_HINT_T0); #endif PacketType C0 = ei_ploadu(&res[(j2+0)*resStride + i]); const Scalar* blB = unpackedB; for(int k=0; k struct ei_gemm_pack_lhs { void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, int lhsStride, int depth, int rows, int stride=0, int offset=0) { enum { PacketSize = ei_packet_traits::size }; ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); ei_conj_if::IsComplex && Conjugate> cj; ei_const_blas_data_mapper lhs(_lhs,lhsStride); int count = 0; int peeled_mc = (rows/mr)*mr; for(int i=0; i=PacketSize) { if(PanelMode) count += PacketSize*offset; for(int k=0; k struct ei_gemm_pack_rhs { typedef typename ei_packet_traits::type Packet; enum { PacketSize = ei_packet_traits::size }; void operator()(Scalar* blockB, const Scalar* rhs, int rhsStride, Scalar alpha, int depth, int cols, int stride=0, int offset=0) { ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); bool hasAlpha = alpha != Scalar(1); int packet_cols = (cols/nr) * nr; int count = 0; for(int j2=0; j2 struct ei_gemm_pack_rhs { enum { PacketSize = ei_packet_traits::size }; void operator()(Scalar* blockB, const Scalar* rhs, int rhsStride, Scalar alpha, int depth, int cols, int stride=0, int offset=0) { ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); bool hasAlpha = alpha != Scalar(1); int packet_cols = (cols/nr) * nr; int count = 0; for(int j2=0; j2