From 26d2cdefd4dc291ba6613d5fd4127f6f1a6b5fa1 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 31 Jul 2014 16:24:29 +0200 Subject: Fix 4x4 inverse via SSE for submatrices --- Eigen/src/LU/arch/Inverse_SSE.h | 11 ++++++++--- test/CMakeLists.txt | 8 +++----- test/inverse.cpp | 8 ++++++++ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/Eigen/src/LU/arch/Inverse_SSE.h b/Eigen/src/LU/arch/Inverse_SSE.h index 60b7a2376..1f62ef14e 100644 --- a/Eigen/src/LU/arch/Inverse_SSE.h +++ b/Eigen/src/LU/arch/Inverse_SSE.h @@ -39,9 +39,11 @@ struct compute_inverse_size4 ResultAlignment = bool(ResultType::Flags&AlignedBit), StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit) }; + typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType; - static void run(const MatrixType& matrix, ResultType& result) + static void run(const MatrixType& mat, ResultType& result) { + ActualMatrixType matrix(mat); EIGEN_ALIGN16 const unsigned int _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 }; // Load the full matrix into registers @@ -167,14 +169,17 @@ struct compute_inverse_size4 ResultAlignment = bool(ResultType::Flags&AlignedBit), StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit) }; - static void run(const MatrixType& matrix, ResultType& result) + typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType; + + static void run(const MatrixType& mat, ResultType& result) { + ActualMatrixType matrix(mat); const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); // The inverse is calculated using "Divide and Conquer" technique. The // original matrix is divide into four 2x2 sub-matrices. Since each - // register of the matrix holds two element, the smaller matrices are + // register of the matrix holds two elements, the smaller matrices are // consisted of two registers. Hence we get a better locality of the // calculations. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c7614e1af..000d16c45 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -220,11 +220,9 @@ ei_add_test(geo_quaternion) ei_add_test(geo_eulerangles) ei_add_test(geo_parametrizedline) ei_add_test(geo_alignedbox) -if(NOT EIGEN_TEST_EVALUATORS) - ei_add_test(geo_hyperplane) - ei_add_test(geo_transformations) - ei_add_test(geo_homogeneous) -endif(NOT EIGEN_TEST_EVALUATORS) +ei_add_test(geo_hyperplane) +ei_add_test(geo_transformations) +ei_add_test(geo_homogeneous) ei_add_test(stdvector) ei_add_test(stdvector_overload) ei_add_test(stdlist) diff --git a/test/inverse.cpp b/test/inverse.cpp index 8187b088d..1195bcc76 100644 --- a/test/inverse.cpp +++ b/test/inverse.cpp @@ -68,6 +68,14 @@ template void inverse(const MatrixType& m) VERIFY_IS_MUCH_SMALLER_THAN(abs(det-m3.determinant()), RealScalar(1)); m3.computeInverseWithCheck(m4, invertible); VERIFY( rows==1 ? invertible : !invertible ); + + // check with submatrices + { + Matrix m3; + m3.setRandom(); + m2 = m3.template topLeftCorner().inverse(); + VERIFY_IS_APPROX( (m3.template topLeftCorner()), m2.inverse() ); + } #endif // check in-place inversion -- cgit v1.2.3