* make PartialLU avoid to generate inf/nan when given a singular matrix

(result undefined, but at least it won't take forever on intel 387) * add lots of comments, especially to LU.h * fix stuff I had broken in Inverse.h * split inverse test
author: Benoit Jacob <jacob.benoit.1@gmail.com> 2009-10-20 00:36:07 -0400
committer: Benoit Jacob <jacob.benoit.1@gmail.com> 2009-10-20 00:36:07 -0400
commit: 13f31b8daf4d67d6310c567de36a34afa7c6e18f (patch)
tree: ee7ff4bad04d312e81c70c15a08b8d32505bd5f3
parent: d1db1352f5632281a822fd0f1f59dd7d96b876a1 (diff)
5 files changed, 82 insertions, 27 deletions
diff --git a/Eigen/src/LU/Inverse.h b/Eigen/src/LU/Inverse.h
index f1154a56b..b4e10b023 100644
--- a/Eigen/src/LU/Inverse.h
+++ b/Eigen/src/LU/Inverse.h
@@ -200,7 +200,7 @@ struct ei_compute_inverse
 {
   static inline void run(const MatrixType& matrix, ResultType* result)
   {
-    result = matrix.partialLu().inverse();
+    *result = matrix.partialLu().inverse();
   }
 };
 
@@ -282,7 +282,9 @@ inline void MatrixBase<Derived>::computeInverse(ResultType *result) const
 template<typename Derived>
 inline const typename MatrixBase<Derived>::PlainMatrixType MatrixBase<Derived>::inverse() const
 {
-  return inverse(*this);
+  typename MatrixBase<Derived>::PlainMatrixType result;
+  computeInverse(&result);
+  return result;
 }
 
 
diff --git a/Eigen/src/LU/LU.h b/Eigen/src/LU/LU.h
index 455a7d67e..2baa71f67 100644
--- a/Eigen/src/LU/LU.h
+++ b/Eigen/src/LU/LU.h
@@ -410,28 +410,32 @@ LU<MatrixType>& LU<MatrixType>::compute(const MatrixType& matrix)
   const int rows = matrix.rows();
   const int cols = matrix.cols();
 
+  // will store the transpositions, before we accumulate them at the end.
+  // can't accumulate on-the-fly because that will be done in reverse order for the rows.
   IntColVectorType rows_transpositions(matrix.rows());
   IntRowVectorType cols_transpositions(matrix.cols());
-  int number_of_transpositions = 0;
+  int number_of_transpositions = 0; // number of NONTRIVIAL transpositions, i.e. rows_transpositions[i]!=i
 
-  RealScalar biggest = RealScalar(0);
-  m_nonzero_pivots = size;
+  m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
   m_maxpivot = RealScalar(0);
   for(int k = 0; k < size; ++k)
   {
+    // First, we need to find the pivot.
+
+    // biggest coefficient in the remaining bottom-right corner (starting at row k, col k)
     int row_of_biggest_in_corner, col_of_biggest_in_corner;
     RealScalar biggest_in_corner;
-
     biggest_in_corner = m_lu.corner(Eigen::BottomRight, rows-k, cols-k)
                         .cwise().abs()
                         .maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner);
-    row_of_biggest_in_corner += k;
-    col_of_biggest_in_corner += k;
-    if(k==0) biggest = biggest_in_corner;
+    row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner,
+    col_of_biggest_in_corner += k; // need to add k to them.
 
-    // if the corner is exactly zero, terminate to avoid generating nan/inf values
+    // if the pivot (hence the corner) is exactly zero, terminate to avoid generating nan/inf values
     if(biggest_in_corner == RealScalar(0))
     {
+      // before exiting, make sure to initialize the still uninitialized row_transpositions
+      // in a sane state without destroying what we already have.
       m_nonzero_pivots = k;
       for(int i = k; i < size; i++)
       {
@@ -443,6 +447,9 @@ LU<MatrixType>& LU<MatrixType>::compute(const MatrixType& matrix)
 
     if(biggest_in_corner > m_maxpivot) m_maxpivot = biggest_in_corner;
 
+    // Now that we've found the pivot, we need to apply the row/col swaps to
+    // bring it to the location (k,k).
+
     rows_transpositions.coeffRef(k) = row_of_biggest_in_corner;
     cols_transpositions.coeffRef(k) = col_of_biggest_in_corner;
     if(k != row_of_biggest_in_corner) {
@@ -453,12 +460,19 @@ LU<MatrixType>& LU<MatrixType>::compute(const MatrixType& matrix)
       m_lu.col(k).swap(m_lu.col(col_of_biggest_in_corner));
       ++number_of_transpositions;
     }
+
+    // Now that the pivot is at the right location, we update the remaining
+    // bottom-right corner by Gaussian elimination.
+    
     if(k<rows-1)
       m_lu.col(k).end(rows-k-1) /= m_lu.coeff(k,k);
     if(k<size-1)
       m_lu.block(k+1,k+1,rows-k-1,cols-k-1).noalias() -= m_lu.col(k).end(rows-k-1) * m_lu.row(k).end(cols-k-1);
   }
 
+  // the main loop is over, we still have to accumulate the transpositions to find the
+  // permutations P and Q
+
   for(int k = 0; k < matrix.rows(); ++k) m_p.coeffRef(k) = k;
   for(int k = size-1; k >= 0; --k)
     std::swap(m_p.coeffRef(k), m_p.coeffRef(rows_transpositions.coeff(k)));
@@ -549,7 +563,10 @@ struct ei_lu_kernel_impl : public ReturnByValue<ei_lu_kernel_impl<MatrixType> >
       if(ei_abs(m_lu.matrixLU().coeff(i,i)) > premultiplied_threshold)
         pivots.coeffRef(p++) = i;
     ei_assert(p == m_rank && "You hit a bug in Eigen! Please report (backtrace and matrix)!");
-    
+
+    // we construct a temporaty trapezoid matrix m, by taking the U matrix and
+    // permuting the rows and cols to bring the nonnegligible pivots to the top of
+    // the main diagonal. We need that to be able to apply our triangular solvers.
     // FIXME when we get triangularView-for-rectangular-matrices, this can be simplified
     Matrix<typename MatrixType::Scalar, Dynamic, Dynamic, MatrixType::Options,
            LUType::MaxSmallDimAtCompileTime, MatrixType::MaxColsAtCompileTime>
@@ -560,18 +577,22 @@ struct ei_lu_kernel_impl : public ReturnByValue<ei_lu_kernel_impl<MatrixType> >
       m.row(i).end(cols-i) = m_lu.matrixLU().row(pivots.coeff(i)).end(cols-i);
     }
     m.block(0, 0, m_rank, m_rank).template triangularView<StrictlyLowerTriangular>().setZero();
-    
     for(int i = 0; i < m_rank; ++i)
       m.col(i).swap(m.col(pivots.coeff(i)));
 
+    // ok, we have our trapezoid matrix, we can apply the triangular solver.
+    // notice that the math behind this suggests that we should apply this to the
+    // negative of the RHS, but for performance we just put the negative sign elsewhere, see below.
     m.corner(TopLeft, m_rank, m_rank)
      .template triangularView<UpperTriangular>().solveInPlace(
         m.corner(TopRight, m_rank, dimker)
       );
 
+    // now we must undo the column permutation that we had applied!
     for(int i = m_rank-1; i >= 0; --i)
       m.col(i).swap(m.col(pivots.coeff(i)));
 
+    // see the negative sign in the next line, that's what we were talking about above.
     for(int i = 0; i < m_rank; ++i) dst.row(m_lu.permutationQ().coeff(i)) = -m.row(i).end(dimker);
     for(int i = m_rank; i < cols; ++i) dst.row(m_lu.permutationQ().coeff(i)).setZero();
     for(int k = 0; k < dimker; ++k) dst.coeffRef(m_lu.permutationQ().coeff(m_rank+k), k) = Scalar(1);
diff --git a/Eigen/src/LU/PartialLU.h b/Eigen/src/LU/PartialLU.h
index e467c62f0..3675b0309 100644
--- a/Eigen/src/LU/PartialLU.h
+++ b/Eigen/src/LU/PartialLU.h
@@ -216,6 +216,7 @@ struct ei_partial_lu_impl
   typedef Map<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > MapLU;
   typedef Block<MapLU, Dynamic, Dynamic> MatrixType;
   typedef Block<MatrixType,Dynamic,Dynamic> BlockType;
+  typedef typename MatrixType::RealScalar RealScalar;
 
   /** \internal performs the LU decomposition in-place of the matrix \a lu
     * using an unblocked algorithm.
@@ -224,8 +225,12 @@ struct ei_partial_lu_impl
     * vector \a row_transpositions which must have a size equal to the number
     * of columns of the matrix \a lu, and an integer \a nb_transpositions
     * which returns the actual number of transpositions.
+    *
+    * \returns false if some pivot is exactly zero, in which case the matrix is left with
+    *          undefined coefficients (to avoid generating inf/nan values). Returns true
+    *          otherwise.
     */
-  static void unblocked_lu(MatrixType& lu, int* row_transpositions, int& nb_transpositions)
+  static bool unblocked_lu(MatrixType& lu, int* row_transpositions, int& nb_transpositions)
   {
     const int rows = lu.rows();
     const int size = std::min(lu.rows(),lu.cols());
@@ -233,9 +238,22 @@ struct ei_partial_lu_impl
     for(int k = 0; k < size; ++k)
     {
       int row_of_biggest_in_col;
-      lu.col(k).end(rows-k).cwise().abs().maxCoeff(&row_of_biggest_in_col);
+      RealScalar biggest_in_corner
+        = lu.col(k).end(rows-k).cwise().abs().maxCoeff(&row_of_biggest_in_col);
       row_of_biggest_in_col += k;
 
+      if(biggest_in_corner == 0) // the pivot is exactly zero: the matrix is singular
+      {
+        // end quickly, avoid generating inf/nan values. Although in this unblocked_lu case
+        // the result is still valid, there's no need to boast about it because
+        // the blocked_lu code can't guarantee the same.
+        // before exiting, make sure to initialize the still uninitialized row_transpositions
+        // in a sane state without destroying what we already have.
+        for(int i = k; i < size; i++)
+          row_transpositions[i] = i;
+        return false;
+      }
+
       row_transpositions[k] = row_of_biggest_in_col;
 
       if(k != row_of_biggest_in_col)
@@ -252,6 +270,7 @@ struct ei_partial_lu_impl
         lu.corner(BottomRight,rrows,rsize).noalias() -= lu.col(k).end(rrows) * lu.row(k).end(rsize);
       }
     }
+    return true;
   }
 
   /** \internal performs the LU decomposition in-place of the matrix represented
@@ -263,11 +282,15 @@ struct ei_partial_lu_impl
     * of columns of the matrix \a lu, and an integer \a nb_transpositions
     * which returns the actual number of transpositions.
     *
+    * \returns false if some pivot is exactly zero, in which case the matrix is left with
+    *          undefined coefficients (to avoid generating inf/nan values). Returns true
+    *          otherwise.
+    *
     * \note This very low level interface using pointers, etc. is to:
     *   1 - reduce the number of instanciations to the strict minimum
     *   2 - avoid infinite recursion of the instanciations with Block<Block<Block<...> > >
     */
-  static void blocked_lu(int rows, int cols, Scalar* lu_data, int luStride, int* row_transpositions, int& nb_transpositions, int maxBlockSize=256)
+  static bool blocked_lu(int rows, int cols, Scalar* lu_data, int luStride, int* row_transpositions, int& nb_transpositions, int maxBlockSize=256)
   {
     MapLU lu1(lu_data,StorageOrder==RowMajor?rows:luStride,StorageOrder==RowMajor?luStride:cols);
     MatrixType lu(lu1,0,0,rows,cols);
@@ -277,8 +300,7 @@ struct ei_partial_lu_impl
     // if the matrix is too small, no blocking:
     if(size<=16)
     {
-      unblocked_lu(lu, row_transpositions, nb_transpositions);
-      return;
+      return unblocked_lu(lu, row_transpositions, nb_transpositions);
     }
 
     // automatically adjust the number of subdivisions to the size
@@ -311,12 +333,20 @@ struct ei_partial_lu_impl
       int nb_transpositions_in_panel;
       // recursively calls the blocked LU algorithm with a very small
       // blocking size:
-      blocked_lu(trows+bs, bs, &lu.coeffRef(k,k), luStride,
-                 row_transpositions+k, nb_transpositions_in_panel, 16);
+      if(!blocked_lu(trows+bs, bs, &lu.coeffRef(k,k), luStride,
+                   row_transpositions+k, nb_transpositions_in_panel, 16))
+      {
+        // end quickly with undefined coefficients, just avoid generating inf/nan values.
+        // before exiting, make sure to initialize the still uninitialized row_transpositions
+        // in a sane state without destroying what we already have.
+        for(int i=k; i<size; ++i)
+          row_transpositions[i] = i;
+        return false;
+      }
       nb_transpositions += nb_transpositions_in_panel;
 
       // update permutations and apply them to A10
-      for(int i=k;i<k+bs; ++i)
+      for(int i=k; i<k+bs; ++i)
       {
         int piv = (row_transpositions[i] += k);
         A_0.row(i).swap(A_0.row(piv));
@@ -334,6 +364,7 @@ struct ei_partial_lu_impl
         A22 -= A21 * A12;
       }
     }
+    return true;
   }
 };
 
diff --git a/doc/snippets/LU_solve.cpp b/doc/snippets/LU_solve.cpp
index 301074305..ade269789 100644
--- a/doc/snippets/LU_solve.cpp
+++ b/doc/snippets/LU_solve.cpp
@@ -9,4 +9,3 @@ if((m*x).isApprox(y))
 }
 else
   cout << "The equation mx=y does not have any solution." << endl;
-
diff --git a/test/inverse.cpp b/test/inverse.cpp
index 65dfbc73e..6fc65786c 100644
--- a/test/inverse.cpp
+++ b/test/inverse.cpp
@@ -83,19 +83,21 @@ void test_inverse()
 {
   int s;
   for(int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST( inverse(Matrix<double,1,1>()) );
-    CALL_SUBTEST( inverse(Matrix2d()) );
-    CALL_SUBTEST( inverse(Matrix3f()) );
-    CALL_SUBTEST( inverse(Matrix4f()) );
+    CALL_SUBTEST1( inverse(Matrix<double,1,1>()) );
+    CALL_SUBTEST2( inverse(Matrix2d()) );
+    CALL_SUBTEST3( inverse(Matrix3f()) );
+    CALL_SUBTEST4( inverse(Matrix4f()) );
     s = ei_random<int>(50,320);
-    CALL_SUBTEST( inverse(MatrixXf(s,s)) );
+    CALL_SUBTEST5( inverse(MatrixXf(s,s)) );
     s = ei_random<int>(25,100);
-    CALL_SUBTEST( inverse(MatrixXcd(s,s)) );
+    CALL_SUBTEST6( inverse(MatrixXcd(s,s)) );
   }
 
+#ifdef EIGEN_TEST_PART_4
   // test some tricky cases for 4x4 matrices
   VERIFY_IS_APPROX((Matrix4f() << 0,0,1,0, 1,0,0,0, 0,1,0,0, 0,0,0,1).finished().inverse(),
                    (Matrix4f() << 0,1,0,0, 0,0,1,0, 1,0,0,0, 0,0,0,1).finished());
   VERIFY_IS_APPROX((Matrix4f() << 1,0,0,0, 0,0,1,0, 0,0,0,1, 0,1,0,0).finished().inverse(),
                    (Matrix4f() << 1,0,0,0, 0,0,0,1, 0,1,0,0, 0,0,1,0).finished());
+#endif
 }
author	Benoit Jacob <jacob.benoit.1@gmail.com>	2009-10-20 00:36:07 -0400
committer	Benoit Jacob <jacob.benoit.1@gmail.com>	2009-10-20 00:36:07 -0400
commit	13f31b8daf4d67d6310c567de36a34afa7c6e18f (patch)
tree	ee7ff4bad04d312e81c70c15a08b8d32505bd5f3
parent	d1db1352f5632281a822fd0f1f59dd7d96b876a1 (diff)