aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2019-02-11 17:59:35 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2019-02-11 17:59:35 +0100
commiteb46f34a8caff181eb0a25e47eda214ede884b1c (patch)
treed99747f401ea88fbdc5ac3d437a00fd755ea898f
parentdada863d2388079a91c413a109a0317a5814d2e7 (diff)
Speed up 2x2 LU by a factor 2, and other small fixed sizes by about 10%.
Not sure that's so critical, but this does not complexify the code base much.
-rw-r--r--Eigen/src/LU/PartialPivLU.h22
1 files changed, 19 insertions, 3 deletions
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index 94c30616a..12e72880d 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -337,6 +337,9 @@ struct partial_lu_impl
static const int UnBlockedBound = 16;
static const bool UnBlockedAtCompileTime = SizeAtCompileTime!=Dynamic && SizeAtCompileTime<=UnBlockedBound;
static const int ActualSizeAtCompileTime = UnBlockedAtCompileTime ? SizeAtCompileTime : Dynamic;
+ // Remaining rows and columns at compile-time:
+ static const int RRows = SizeAtCompileTime==2 ? 1 : Dynamic;
+ static const int RCols = SizeAtCompileTime==2 ? 1 : Dynamic;
typedef Matrix<Scalar, ActualSizeAtCompileTime, ActualSizeAtCompileTime, StorageOrder> MatrixType;
typedef Ref<MatrixType> MatrixTypeRef;
typedef Ref<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > BlockType;
@@ -359,9 +362,12 @@ struct partial_lu_impl
const Index rows = lu.rows();
const Index cols = lu.cols();
const Index size = (std::min)(rows,cols);
+ // For small compile-time matrices it is worth processing the last row separately:
+ // speedup: +100% for 2x2, +10% for others.
+ const Index endk = UnBlockedAtCompileTime ? size-1 : size;
nb_transpositions = 0;
Index first_zero_pivot = -1;
- for(Index k = 0; k < size; ++k)
+ for(Index k = 0; k < endk; ++k)
{
Index rrows = rows-k-1;
Index rcols = cols-k-1;
@@ -383,7 +389,7 @@ struct partial_lu_impl
// FIXME shall we introduce a safe quotient expression in cas 1/lu.coeff(k,k)
// overflow but not the actual quotient?
- lu.col(k).tail(rrows) /= lu.coeff(k,k);
+ lu.col(k).tail(fix<RRows>(rrows)) /= lu.coeff(k,k);
}
else if(first_zero_pivot==-1)
{
@@ -393,8 +399,18 @@ struct partial_lu_impl
}
if(k<rows-1)
- lu.bottomRightCorner(rrows,rcols).noalias() -= lu.col(k).tail(rrows) * lu.row(k).tail(rcols);
+ lu.bottomRightCorner(fix<RRows>(rrows),fix<RCols>(rcols)).noalias() -= lu.col(k).tail(fix<RRows>(rrows)) * lu.row(k).tail(fix<RCols>(rcols));
}
+
+ // special handling of the last entry
+ if(UnBlockedAtCompileTime)
+ {
+ Index k = endk;
+ row_transpositions[k] = PivIndex(k);
+ if (std::abs(lu(k, k)) == 0 && first_zero_pivot == -1)
+ first_zero_pivot = k;
+ }
+
return first_zero_pivot;
}