// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2009 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 3 of the License, or (at your option) any later version. // // Alternatively, you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2 of // the License, or (at your option) any later version. // // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the // GNU General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License and a copy of the GNU General Public License along with // Eigen. If not, see . #ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H #define EIGEN_TRIANGULAR_MATRIX_MATRIX_H // template // struct ei_gemm_pack_lhs_triangular // { // Matrix::IsComplex && Conjugate> cj; // ei_const_blas_data_mapper lhs(_lhs,lhsStride); // int count = 0; // const int peeled_mc = (rows/mr)*mr; // for(int i=0; i struct ei_product_triangular_matrix_matrix; template struct ei_product_triangular_matrix_matrix { static EIGEN_STRONG_INLINE void run( Index rows, Index cols, Index depth, const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) { ei_product_triangular_matrix_matrix ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha); } }; // implements col-major += alpha * op(triangular) * op(general) template struct ei_product_triangular_matrix_matrix { static EIGEN_DONT_INLINE void run( Index rows, Index cols, Index depth, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) { ei_const_blas_data_mapper lhs(_lhs,lhsStride); ei_const_blas_data_mapper rhs(_rhs,rhsStride); typedef ei_gebp_traits Traits; enum { SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr), IsLower = (Mode&Lower) == Lower, SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1 }; Index kc = depth; // cache block size along the K direction Index mc = rows; // cache block size along the M direction Index nc = cols; // cache block size along the N direction computeProductBlockingSizes(kc, mc, nc); Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); std::size_t sizeW = kc*Traits::WorkSpaceFactor; std::size_t sizeB = sizeW + kc*cols; Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); Scalar* blockB = allocatedBlockB + sizeW; Matrix triangularBuffer; triangularBuffer.setZero(); if((Mode&ZeroDiag)==ZeroDiag) triangularBuffer.diagonal().setZero(); else triangularBuffer.diagonal().setOnes(); ei_gebp_kernel gebp_kernel; ei_gemm_pack_lhs pack_lhs; ei_gemm_pack_rhs pack_rhs; for(Index k2=IsLower ? depth : 0; IsLower ? k2>0 : k2rows)) { actual_kc = rows-k2; k2 = k2+actual_kc-kc; } pack_rhs(blockB, &rhs(actual_k2,0), rhsStride, actual_kc, cols); // the selected lhs's panel has to be split in three different parts: // 1 - the part which is above the diagonal block => skip it // 2 - the diagonal block => special kernel // 3 - the panel below the diagonal block => GEPP // the block diagonal, if any if(IsLower || actual_k2(actual_kc-k1, SmallPanelWidth); Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1; Index startBlock = actual_k2+k1; Index blockBOffset = k1; // => GEBP with the micro triangular block // The trick is to pack this micro block while filling the opposite triangular part with zeros. // To this end we do an extra triangular copy to a small temporary buffer for (Index k=0;k0) { Index startTarget = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2; pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget); gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha, actualPanelWidth, actual_kc, 0, blockBOffset); } } } // the part below the diagonal => GEPP { Index start = IsLower ? k2 : 0; Index end = IsLower ? rows : std::min(actual_k2,rows); for(Index i2=start; i2() (blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc); gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); } } } ei_aligned_stack_delete(Scalar, blockA, kc*mc); ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB); // delete[] allocatedBlockB; } }; // implements col-major += alpha * op(general) * op(triangular) template struct ei_product_triangular_matrix_matrix { static EIGEN_DONT_INLINE void run( Index rows, Index cols, Index depth, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) { ei_const_blas_data_mapper lhs(_lhs,lhsStride); ei_const_blas_data_mapper rhs(_rhs,rhsStride); typedef ei_gebp_traits Traits; enum { SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr), IsLower = (Mode&Lower) == Lower, SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1 }; Index kc = depth; // cache block size along the K direction Index mc = rows; // cache block size along the M direction Index nc = cols; // cache block size along the N direction computeProductBlockingSizes(kc, mc, nc); Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); std::size_t sizeW = kc*Traits::WorkSpaceFactor; std::size_t sizeB = sizeW + kc*cols; Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar,sizeB); Scalar* blockB = allocatedBlockB + sizeW; Matrix triangularBuffer; triangularBuffer.setZero(); if((Mode&ZeroDiag)==ZeroDiag) triangularBuffer.diagonal().setZero(); else triangularBuffer.diagonal().setOnes(); ei_gebp_kernel gebp_kernel; ei_gemm_pack_lhs pack_lhs; ei_gemm_pack_rhs pack_rhs; ei_gemm_pack_rhs pack_rhs_panel; for(Index k2=IsLower ? 0 : depth; IsLower ? k20; IsLower ? k2+=kc : k2-=kc) { Index actual_kc = std::min(IsLower ? depth-k2 : k2, kc); Index actual_k2 = IsLower ? k2 : k2-actual_kc; // align blocks with the end of the triangular part for trapezoidal rhs if(IsLower && (k2cols)) { actual_kc = cols-k2; k2 = actual_k2 + actual_kc - kc; } // remaining size Index rs = IsLower ? std::min(cols,actual_k2) : cols - k2; // size of the triangular part Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc; Scalar* geb = blockB+ts*ts; pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, actual_kc, rs); // pack the triangular part of the rhs padding the unrolled blocks with zeros if(ts>0) { for (Index j2=0; j2(actual_kc-j2, SmallPanelWidth); Index actual_j2 = actual_k2 + j2; Index panelOffset = IsLower ? j2+actualPanelWidth : 0; Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2; // general part pack_rhs_panel(blockB+j2*actual_kc, &rhs(actual_k2+panelOffset, actual_j2), rhsStride, panelLength, actualPanelWidth, actual_kc, panelOffset); // append the triangular part via a temporary buffer for (Index j=0;j0) { for (Index j2=0; j2(actual_kc-j2, SmallPanelWidth); Index panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth; Index blockOffset = IsLower ? j2 : 0; gebp_kernel(res+i2+(actual_k2+j2)*resStride, resStride, blockA, blockB+j2*actual_kc, actual_mc, panelLength, actualPanelWidth, alpha, actual_kc, actual_kc, // strides blockOffset, blockOffset,// offsets allocatedBlockB); // workspace } } gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride, blockA, geb, actual_mc, actual_kc, rs, alpha, -1, -1, 0, 0, allocatedBlockB); } } ei_aligned_stack_delete(Scalar, blockA, kc*mc); ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB); } }; /*************************************************************************** * Wrapper to ei_product_triangular_matrix_matrix ***************************************************************************/ template struct ei_traits > : ei_traits, Lhs, Rhs> > {}; template struct TriangularProduct : public ProductBase, Lhs, Rhs > { EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct) TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} template void scaleAndAddTo(Dest& dst, Scalar alpha) const { const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) * RhsBlasTraits::extractScalarFactor(m_rhs); ei_product_triangular_matrix_matrix::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, (ei_traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, (ei_traits::Flags&RowMajorBit) ? RowMajor : ColMajor> ::run( lhs.rows(), rhs.cols(), lhs.cols(),// LhsIsTriangular ? rhs.cols() : lhs.rows(), // sizes &lhs.coeff(0,0), lhs.outerStride(), // lhs info &rhs.coeff(0,0), rhs.outerStride(), // rhs info &dst.coeffRef(0,0), dst.outerStride(), // result info actualAlpha // alpha ); } }; #endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H