diff options
author | Gael Guennebaud <g.gael@free.fr> | 2019-02-18 16:21:27 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2019-02-18 16:21:27 +0100 |
commit | 796db94e6e82548a7594c00b4ae83efbe76baffc (patch) | |
tree | a74a5abd26d70f60a5d864e0259166693ff472f4 /Eigen/src/LU/Determinant.h | |
parent | 31b6e080a9235f6275012ac69f0938b211444edf (diff) |
bug #1194: implement slightly faster and SIMD friendly 4x4 determinant.
Diffstat (limited to 'Eigen/src/LU/Determinant.h')
-rw-r--r-- | Eigen/src/LU/Determinant.h | 43 |
1 files changed, 26 insertions, 17 deletions
diff --git a/Eigen/src/LU/Determinant.h b/Eigen/src/LU/Determinant.h index 6af63a6e7..3a41e6fcb 100644 --- a/Eigen/src/LU/Determinant.h +++ b/Eigen/src/LU/Determinant.h @@ -23,15 +23,6 @@ inline const typename Derived::Scalar bruteforce_det3_helper * (matrix.coeff(1,b) * matrix.coeff(2,c) - matrix.coeff(1,c) * matrix.coeff(2,b)); } -template<typename Derived> -EIGEN_DEVICE_FUNC -const typename Derived::Scalar bruteforce_det4_helper -(const MatrixBase<Derived>& matrix, int j, int k, int m, int n) -{ - return (matrix.coeff(j,0) * matrix.coeff(k,1) - matrix.coeff(k,0) * matrix.coeff(j,1)) - * (matrix.coeff(m,2) * matrix.coeff(n,3) - matrix.coeff(n,2) * matrix.coeff(m,3)); -} - template<typename Derived, int DeterminantType = Derived::RowsAtCompileTime > struct determinant_impl @@ -75,16 +66,34 @@ template<typename Derived> struct determinant_impl<Derived, 3> template<typename Derived> struct determinant_impl<Derived, 4> { + typedef typename traits<Derived>::Scalar Scalar; static EIGEN_DEVICE_FUNC - typename traits<Derived>::Scalar run(const Derived& m) + Scalar run(const Derived& m) + { + Scalar d2_01 = det2(m, 0, 1); + Scalar d2_02 = det2(m, 0, 2); + Scalar d2_03 = det2(m, 0, 3); + Scalar d2_12 = det2(m, 1, 2); + Scalar d2_13 = det2(m, 1, 3); + Scalar d2_23 = det2(m, 2, 3); + Scalar d3_0 = det3(m, 1,d2_23, 2,d2_13, 3,d2_12); + Scalar d3_1 = det3(m, 0,d2_23, 2,d2_03, 3,d2_02); + Scalar d3_2 = det3(m, 0,d2_13, 1,d2_03, 3,d2_01); + Scalar d3_3 = det3(m, 0,d2_12, 1,d2_02, 2,d2_01); + return internal::pmadd(-m(0,3),d3_0, m(1,3)*d3_1) + + internal::pmadd(-m(2,3),d3_2, m(3,3)*d3_3); + } +protected: + static EIGEN_DEVICE_FUNC + Scalar det2(const Derived& m, Index i0, Index i1) + { + return m(i0,0) * m(i1,1) - m(i1,0) * m(i0,1); + } + + static EIGEN_DEVICE_FUNC + Scalar det3(const Derived& m, Index i0, const Scalar& d0, Index i1, const Scalar& d1, Index i2, const Scalar& d2) { - // trick by Martin Costabel to compute 4x4 det with only 30 muls - return bruteforce_det4_helper(m,0,1,2,3) - - bruteforce_det4_helper(m,0,2,1,3) - + bruteforce_det4_helper(m,0,3,1,2) - + bruteforce_det4_helper(m,1,2,0,3) - - bruteforce_det4_helper(m,1,3,0,2) - + bruteforce_det4_helper(m,2,3,0,1); + return internal::pmadd(m(i0,2), d0, internal::pmadd(-m(i1,2), d1, m(i2,2)*d2)); } }; |