aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/LU/arch/Inverse_SSE.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/LU/arch/Inverse_SSE.h')
-rw-r--r--Eigen/src/LU/arch/Inverse_SSE.h11
1 files changed, 8 insertions, 3 deletions
diff --git a/Eigen/src/LU/arch/Inverse_SSE.h b/Eigen/src/LU/arch/Inverse_SSE.h
index 60b7a2376..1f62ef14e 100644
--- a/Eigen/src/LU/arch/Inverse_SSE.h
+++ b/Eigen/src/LU/arch/Inverse_SSE.h
@@ -39,9 +39,11 @@ struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType>
ResultAlignment = bool(ResultType::Flags&AlignedBit),
StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
};
+ typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
- static void run(const MatrixType& matrix, ResultType& result)
+ static void run(const MatrixType& mat, ResultType& result)
{
+ ActualMatrixType matrix(mat);
EIGEN_ALIGN16 const unsigned int _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 };
// Load the full matrix into registers
@@ -167,14 +169,17 @@ struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType>
ResultAlignment = bool(ResultType::Flags&AlignedBit),
StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
};
- static void run(const MatrixType& matrix, ResultType& result)
+ typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
+
+ static void run(const MatrixType& mat, ResultType& result)
{
+ ActualMatrixType matrix(mat);
const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
// The inverse is calculated using "Divide and Conquer" technique. The
// original matrix is divide into four 2x2 sub-matrices. Since each
- // register of the matrix holds two element, the smaller matrices are
+ // register of the matrix holds two elements, the smaller matrices are
// consisted of two registers. Hence we get a better locality of the
// calculations.