diff options
Diffstat (limited to 'Eigen/src/LU/arch/Inverse_SSE.h')
-rw-r--r-- | Eigen/src/LU/arch/Inverse_SSE.h | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/Eigen/src/LU/arch/Inverse_SSE.h b/Eigen/src/LU/arch/Inverse_SSE.h index 60b7a2376..1f62ef14e 100644 --- a/Eigen/src/LU/arch/Inverse_SSE.h +++ b/Eigen/src/LU/arch/Inverse_SSE.h @@ -39,9 +39,11 @@ struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType> ResultAlignment = bool(ResultType::Flags&AlignedBit), StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit) }; + typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType; - static void run(const MatrixType& matrix, ResultType& result) + static void run(const MatrixType& mat, ResultType& result) { + ActualMatrixType matrix(mat); EIGEN_ALIGN16 const unsigned int _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 }; // Load the full matrix into registers @@ -167,14 +169,17 @@ struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType> ResultAlignment = bool(ResultType::Flags&AlignedBit), StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit) }; - static void run(const MatrixType& matrix, ResultType& result) + typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType; + + static void run(const MatrixType& mat, ResultType& result) { + ActualMatrixType matrix(mat); const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); // The inverse is calculated using "Divide and Conquer" technique. The // original matrix is divide into four 2x2 sub-matrices. Since each - // register of the matrix holds two element, the smaller matrices are + // register of the matrix holds two elements, the smaller matrices are // consisted of two registers. Hence we get a better locality of the // calculations. |