// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2009 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 3 of the License, or (at your option) any later version. // // Alternatively, you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2 of // the License, or (at your option) any later version. // // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the // GNU General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License and a copy of the GNU General Public License along with // Eigen. If not, see . #ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H #define EIGEN_TRIANGULAR_MATRIX_MATRIX_H // template // struct ei_gemm_pack_lhs_triangular // { // Matrix::IsComplex && Conjugate> cj; // ei_const_blas_data_mapper lhs(_lhs,lhsStride); // int count = 0; // const int peeled_mc = (rows/mr)*mr; // for(int i=0; i struct ei_product_triangular_matrix_matrix; template struct ei_product_triangular_matrix_matrix { static EIGEN_STRONG_INLINE void run( int size, int otherSize, const Scalar* lhs, int lhsStride, const Scalar* rhs, int rhsStride, Scalar* res, int resStride, Scalar alpha) { ei_product_triangular_matrix_matrix ::run(size, otherSize, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha); } }; // implements col-major += alpha * op(triangular) * op(general) template struct ei_product_triangular_matrix_matrix { static EIGEN_DONT_INLINE void run( int size, int cols, const Scalar* _lhs, int lhsStride, const Scalar* _rhs, int rhsStride, Scalar* res, int resStride, Scalar alpha) { int rows = size; ei_const_blas_data_mapper lhs(_lhs,lhsStride); ei_const_blas_data_mapper rhs(_rhs,rhsStride); if (ConjugateRhs) alpha = ei_conj(alpha); typedef ei_product_blocking_traits Blocking; enum { SmallPanelWidth = EIGEN_ENUM_MAX(Blocking::mr,Blocking::nr), IsLower = (Mode&Lower) == Lower }; int kc = std::min(Blocking::Max_kc/4,size); // cache block size along the K direction int mc = std::min(Blocking::Max_mc,rows); // cache block size along the M direction Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols; Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); // Scalar* allocatedBlockB = new Scalar[sizeB]; Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr; Matrix triangularBuffer; triangularBuffer.setZero(); triangularBuffer.diagonal().setOnes(); ei_gebp_kernel > gebp_kernel; ei_gemm_pack_lhs pack_lhs; ei_gemm_pack_rhs pack_rhs; for(int k2=IsLower ? size : 0; IsLower ? k2>0 : k2 skip it // 2 - the diagonal block => special kernel // 3 - the panel below the diagonal block => GEPP // the block diagonal { // for each small vertical panels of lhs for (int k1=0; k1(actual_kc-k1, SmallPanelWidth); int lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1; int startBlock = actual_k2+k1; int blockBOffset = k1; // => GEBP with the micro triangular block // The trick is to pack this micro block while filling the opposite triangular part with zeros. // To this end we do an extra triangular copy to a small temporary buffer for (int k=0;k0) { int startTarget = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2; pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget); gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, actualPanelWidth, actual_kc, 0, blockBOffset); } } } // the part below the diagonal => GEPP { int start = IsLower ? k2 : 0; int end = IsLower ? size : actual_k2; for(int i2=start; i2() (blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc); gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols); } } } ei_aligned_stack_delete(Scalar, blockA, kc*mc); ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB); // delete[] allocatedBlockB; } }; // implements col-major += alpha * op(general) * op(triangular) template struct ei_product_triangular_matrix_matrix { static EIGEN_DONT_INLINE void run( int size, int rows, const Scalar* _lhs, int lhsStride, const Scalar* _rhs, int rhsStride, Scalar* res, int resStride, Scalar alpha) { int cols = size; ei_const_blas_data_mapper lhs(_lhs,lhsStride); ei_const_blas_data_mapper rhs(_rhs,rhsStride); if (ConjugateRhs) alpha = ei_conj(alpha); typedef ei_product_blocking_traits Blocking; enum { SmallPanelWidth = EIGEN_ENUM_MAX(Blocking::mr,Blocking::nr), IsLower = (Mode&Lower) == Lower }; int kc = std::min(Blocking::Max_kc/4,size); // cache block size along the K direction int mc = std::min(Blocking::Max_mc,rows); // cache block size along the M direction Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols; Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar,sizeB); Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr; Matrix triangularBuffer; triangularBuffer.setZero(); triangularBuffer.diagonal().setOnes(); ei_gebp_kernel > gebp_kernel; ei_gemm_pack_lhs pack_lhs; ei_gemm_pack_rhs pack_rhs; ei_gemm_pack_rhs pack_rhs_panel; for(int k2=IsLower ? 0 : size; IsLower ? k20; IsLower ? k2+=kc : k2-=kc) { const int actual_kc = std::min(IsLower ? size-k2 : k2, kc); int actual_k2 = IsLower ? k2 : k2-actual_kc; int rs = IsLower ? actual_k2 : size - k2; Scalar* geb = blockB+actual_kc*actual_kc; pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, alpha, actual_kc, rs); // pack the triangular part of the rhs padding the unrolled blocks with zeros { for (int j2=0; j2(actual_kc-j2, SmallPanelWidth); int actual_j2 = actual_k2 + j2; int panelOffset = IsLower ? j2+actualPanelWidth : 0; int panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2; // general part pack_rhs_panel(blockB+j2*actual_kc, &rhs(actual_k2+panelOffset, actual_j2), rhsStride, alpha, panelLength, actualPanelWidth, actual_kc, panelOffset); // append the triangular part via a temporary buffer for (int j=0;j(actual_kc-j2, SmallPanelWidth); int panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth; int blockOffset = IsLower ? j2 : 0; gebp_kernel(res+i2+(actual_k2+j2)*resStride, resStride, blockA, blockB+j2*actual_kc, actual_mc, panelLength, actualPanelWidth, actual_kc, actual_kc, // strides blockOffset, blockOffset,// offsets allocatedBlockB); // workspace } } gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride, blockA, geb, actual_mc, actual_kc, rs, -1, -1, 0, 0, allocatedBlockB); } } ei_aligned_stack_delete(Scalar, blockA, kc*mc); ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB); } }; /*************************************************************************** * Wrapper to ei_product_triangular_matrix_matrix ***************************************************************************/ template struct ei_traits > : ei_traits, Lhs, Rhs> > {}; template struct TriangularProduct : public ProductBase, Lhs, Rhs > { EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct) TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} template void scaleAndAddTo(Dest& dst, Scalar alpha) const { const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) * RhsBlasTraits::extractScalarFactor(m_rhs); ei_product_triangular_matrix_matrix::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, (ei_traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, (ei_traits::Flags&RowMajorBit) ? RowMajor : ColMajor> ::run( lhs.rows(), LhsIsTriangular ? rhs.cols() : lhs.rows(), // sizes &lhs.coeff(0,0), lhs.outerStride(), // lhs info &rhs.coeff(0,0), rhs.outerStride(), // rhs info &dst.coeffRef(0,0), dst.outerStride(), // result info actualAlpha // alpha ); } }; #endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H