Fix 4x4 inverse via SSE for submatrices

author: Gael Guennebaud <g.gael@free.fr> 2014-07-31 16:24:29 +0200
committer: Gael Guennebaud <g.gael@free.fr> 2014-07-31 16:24:29 +0200
commit: 26d2cdefd4dc291ba6613d5fd4127f6f1a6b5fa1 (patch)
tree: 8f42f8721705569ad76704eff0e9e13dfb4db8f5
parent: db183ca7b31a366fa1a870927d68c3bf6de15c4d (diff)
3 files changed, 19 insertions, 8 deletions
diff --git a/Eigen/src/LU/arch/Inverse_SSE.h b/Eigen/src/LU/arch/Inverse_SSE.h
index 60b7a2376..1f62ef14e 100644
--- a/Eigen/src/LU/arch/Inverse_SSE.h
+++ b/Eigen/src/LU/arch/Inverse_SSE.h
@@ -39,9 +39,11 @@ struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType>
     ResultAlignment     = bool(ResultType::Flags&AlignedBit),
     StorageOrdersMatch  = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
   };
+  typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
   
-  static void run(const MatrixType& matrix, ResultType& result)
+  static void run(const MatrixType& mat, ResultType& result)
   {
+    ActualMatrixType matrix(mat);
     EIGEN_ALIGN16 const unsigned int _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 };
 
     // Load the full matrix into registers
@@ -167,14 +169,17 @@ struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType>
     ResultAlignment = bool(ResultType::Flags&AlignedBit),
     StorageOrdersMatch  = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
   };
-  static void run(const MatrixType& matrix, ResultType& result)
+  typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
+  
+  static void run(const MatrixType& mat, ResultType& result)
   {
+    ActualMatrixType matrix(mat);
     const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
     const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
 
     // The inverse is calculated using "Divide and Conquer" technique. The
     // original matrix is divide into four 2x2 sub-matrices. Since each
-    // register of the matrix holds two element, the smaller matrices are
+    // register of the matrix holds two elements, the smaller matrices are
     // consisted of two registers. Hence we get a better locality of the
     // calculations.
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index c7614e1af..000d16c45 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -220,11 +220,9 @@ ei_add_test(geo_quaternion)
 ei_add_test(geo_eulerangles)
 ei_add_test(geo_parametrizedline)
 ei_add_test(geo_alignedbox)
-if(NOT EIGEN_TEST_EVALUATORS)
-  ei_add_test(geo_hyperplane)
-  ei_add_test(geo_transformations)
-  ei_add_test(geo_homogeneous)
-endif(NOT EIGEN_TEST_EVALUATORS)
+ei_add_test(geo_hyperplane)
+ei_add_test(geo_transformations)
+ei_add_test(geo_homogeneous)
 ei_add_test(stdvector)
 ei_add_test(stdvector_overload)
 ei_add_test(stdlist)
diff --git a/test/inverse.cpp b/test/inverse.cpp
index 8187b088d..1195bcc76 100644
--- a/test/inverse.cpp
+++ b/test/inverse.cpp
@@ -68,6 +68,14 @@ template<typename MatrixType> void inverse(const MatrixType& m)
   VERIFY_IS_MUCH_SMALLER_THAN(abs(det-m3.determinant()), RealScalar(1));
   m3.computeInverseWithCheck(m4, invertible);
   VERIFY( rows==1 ? invertible : !invertible );
+  
+  // check with submatrices
+  {
+    Matrix<Scalar, MatrixType::RowsAtCompileTime+1, MatrixType::RowsAtCompileTime+1, MatrixType::Options> m3;
+    m3.setRandom();
+    m2 = m3.template topLeftCorner<MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime>().inverse();
+    VERIFY_IS_APPROX( (m3.template topLeftCorner<MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime>()), m2.inverse() );
+  }
 #endif
 
   // check in-place inversion
author	Gael Guennebaud <g.gael@free.fr>	2014-07-31 16:24:29 +0200
committer	Gael Guennebaud <g.gael@free.fr>	2014-07-31 16:24:29 +0200
commit	26d2cdefd4dc291ba6613d5fd4127f6f1a6b5fa1 (patch)
tree	8f42f8721705569ad76704eff0e9e13dfb4db8f5
parent	db183ca7b31a366fa1a870927d68c3bf6de15c4d (diff)