// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2008 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 3 of the License, or (at your option) any later version. // // Alternatively, you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2 of // the License, or (at your option) any later version. // // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the // GNU General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License and a copy of the GNU General Public License along with // Eigen. If not, see . #ifndef EIGEN_GENERAL_MATRIX_MATRIX_H #define EIGEN_GENERAL_MATRIX_MATRIX_H template struct ei_L2_block_traits { enum {width = 8 * ei_meta_sqrt::ret }; }; #ifndef EIGEN_EXTERN_INSTANTIATIONS template static void ei_cache_friendly_product( int _rows, int _cols, int depth, bool _lhsRowMajor, const Scalar* _lhs, int _lhsStride, bool _rhsRowMajor, const Scalar* _rhs, int _rhsStride, bool resRowMajor, Scalar* res, int resStride) { const Scalar* EIGEN_RESTRICT lhs; const Scalar* EIGEN_RESTRICT rhs; int lhsStride, rhsStride, rows, cols; bool lhsRowMajor; if (resRowMajor) { lhs = _rhs; rhs = _lhs; lhsStride = _rhsStride; rhsStride = _lhsStride; cols = _rows; rows = _cols; lhsRowMajor = !_rhsRowMajor; ei_assert(_lhsRowMajor); } else { lhs = _lhs; rhs = _rhs; lhsStride = _lhsStride; rhsStride = _rhsStride; rows = _rows; cols = _cols; lhsRowMajor = _lhsRowMajor; ei_assert(!_rhsRowMajor); } typedef typename ei_packet_traits::type PacketType; #ifndef EIGEN_USE_ALT_PRODUCT enum { PacketSize = sizeof(PacketType)/sizeof(Scalar), #if (defined __i386__) HalfRegisterCount = 4, #else HalfRegisterCount = 8, #endif // register block size along the N direction nr = HalfRegisterCount/2, // register block size along the M direction mr = 2 * PacketSize, // max cache block size along the K direction Max_kc = ei_L2_block_traits::width, // max cache block size along the M direction Max_mc = 2*Max_kc }; int kc = std::min(Max_kc,depth); // cache block size along the K direction int mc = std::min(Max_mc,rows); // cache block size along the M direction Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); Scalar* blockB = ei_aligned_stack_new(Scalar, kc*cols*PacketSize); // number of columns which can be processed by packet of nr columns int packet_cols = (cols/nr)*nr; // GEMM_VAR1 for(int k2=0; k2 GEPP_VAR1 for(int i2=0; i2::width }; const bool resIsAligned = (PacketSize==1) || (((resStride%PacketSize) == 0) && (size_t(res)%16==0)); const int remainingSize = depth % PacketSize; const int size = depth - remainingSize; // third dimension of the product clamped to packet boundaries const int l2BlockRows = MaxL2BlockSize > rows ? rows : 512; const int l2BlockCols = MaxL2BlockSize > cols ? cols : 128; const int l2BlockSize = MaxL2BlockSize > size ? size : 256; const int l2BlockSizeAligned = (1 + std::max(l2BlockSize,l2BlockCols)/PacketSize)*PacketSize; const bool needRhsCopy = (PacketSize>1) && ((rhsStride%PacketSize!=0) || (size_t(rhs)%16!=0)); Scalar* EIGEN_RESTRICT block = new Scalar[l2BlockRows*size]; // for(int i=0; i