diff options
author | Gael Guennebaud <g.gael@free.fr> | 2019-02-11 17:59:35 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2019-02-11 17:59:35 +0100 |
commit | eb46f34a8caff181eb0a25e47eda214ede884b1c (patch) | |
tree | d99747f401ea88fbdc5ac3d437a00fd755ea898f | |
parent | dada863d2388079a91c413a109a0317a5814d2e7 (diff) |
Speed up 2x2 LU by a factor 2, and other small fixed sizes by about 10%.
Not sure that's so critical, but this does not complexify the code base much.
-rw-r--r-- | Eigen/src/LU/PartialPivLU.h | 22 |
1 files changed, 19 insertions, 3 deletions
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index 94c30616a..12e72880d 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -337,6 +337,9 @@ struct partial_lu_impl static const int UnBlockedBound = 16; static const bool UnBlockedAtCompileTime = SizeAtCompileTime!=Dynamic && SizeAtCompileTime<=UnBlockedBound; static const int ActualSizeAtCompileTime = UnBlockedAtCompileTime ? SizeAtCompileTime : Dynamic; + // Remaining rows and columns at compile-time: + static const int RRows = SizeAtCompileTime==2 ? 1 : Dynamic; + static const int RCols = SizeAtCompileTime==2 ? 1 : Dynamic; typedef Matrix<Scalar, ActualSizeAtCompileTime, ActualSizeAtCompileTime, StorageOrder> MatrixType; typedef Ref<MatrixType> MatrixTypeRef; typedef Ref<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > BlockType; @@ -359,9 +362,12 @@ struct partial_lu_impl const Index rows = lu.rows(); const Index cols = lu.cols(); const Index size = (std::min)(rows,cols); + // For small compile-time matrices it is worth processing the last row separately: + // speedup: +100% for 2x2, +10% for others. + const Index endk = UnBlockedAtCompileTime ? size-1 : size; nb_transpositions = 0; Index first_zero_pivot = -1; - for(Index k = 0; k < size; ++k) + for(Index k = 0; k < endk; ++k) { Index rrows = rows-k-1; Index rcols = cols-k-1; @@ -383,7 +389,7 @@ struct partial_lu_impl // FIXME shall we introduce a safe quotient expression in cas 1/lu.coeff(k,k) // overflow but not the actual quotient? - lu.col(k).tail(rrows) /= lu.coeff(k,k); + lu.col(k).tail(fix<RRows>(rrows)) /= lu.coeff(k,k); } else if(first_zero_pivot==-1) { @@ -393,8 +399,18 @@ struct partial_lu_impl } if(k<rows-1) - lu.bottomRightCorner(rrows,rcols).noalias() -= lu.col(k).tail(rrows) * lu.row(k).tail(rcols); + lu.bottomRightCorner(fix<RRows>(rrows),fix<RCols>(rcols)).noalias() -= lu.col(k).tail(fix<RRows>(rrows)) * lu.row(k).tail(fix<RCols>(rcols)); } + + // special handling of the last entry + if(UnBlockedAtCompileTime) + { + Index k = endk; + row_transpositions[k] = PivIndex(k); + if (std::abs(lu(k, k)) == 0 && first_zero_pivot == -1) + first_zero_pivot = k; + } + return first_zero_pivot; } |