aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/LU/Determinant.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2019-02-18 16:21:27 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2019-02-18 16:21:27 +0100
commit796db94e6e82548a7594c00b4ae83efbe76baffc (patch)
treea74a5abd26d70f60a5d864e0259166693ff472f4 /Eigen/src/LU/Determinant.h
parent31b6e080a9235f6275012ac69f0938b211444edf (diff)
bug #1194: implement slightly faster and SIMD friendly 4x4 determinant.
Diffstat (limited to 'Eigen/src/LU/Determinant.h')
-rw-r--r--Eigen/src/LU/Determinant.h43
1 files changed, 26 insertions, 17 deletions
diff --git a/Eigen/src/LU/Determinant.h b/Eigen/src/LU/Determinant.h
index 6af63a6e7..3a41e6fcb 100644
--- a/Eigen/src/LU/Determinant.h
+++ b/Eigen/src/LU/Determinant.h
@@ -23,15 +23,6 @@ inline const typename Derived::Scalar bruteforce_det3_helper
* (matrix.coeff(1,b) * matrix.coeff(2,c) - matrix.coeff(1,c) * matrix.coeff(2,b));
}
-template<typename Derived>
-EIGEN_DEVICE_FUNC
-const typename Derived::Scalar bruteforce_det4_helper
-(const MatrixBase<Derived>& matrix, int j, int k, int m, int n)
-{
- return (matrix.coeff(j,0) * matrix.coeff(k,1) - matrix.coeff(k,0) * matrix.coeff(j,1))
- * (matrix.coeff(m,2) * matrix.coeff(n,3) - matrix.coeff(n,2) * matrix.coeff(m,3));
-}
-
template<typename Derived,
int DeterminantType = Derived::RowsAtCompileTime
> struct determinant_impl
@@ -75,16 +66,34 @@ template<typename Derived> struct determinant_impl<Derived, 3>
template<typename Derived> struct determinant_impl<Derived, 4>
{
+ typedef typename traits<Derived>::Scalar Scalar;
static EIGEN_DEVICE_FUNC
- typename traits<Derived>::Scalar run(const Derived& m)
+ Scalar run(const Derived& m)
+ {
+ Scalar d2_01 = det2(m, 0, 1);
+ Scalar d2_02 = det2(m, 0, 2);
+ Scalar d2_03 = det2(m, 0, 3);
+ Scalar d2_12 = det2(m, 1, 2);
+ Scalar d2_13 = det2(m, 1, 3);
+ Scalar d2_23 = det2(m, 2, 3);
+ Scalar d3_0 = det3(m, 1,d2_23, 2,d2_13, 3,d2_12);
+ Scalar d3_1 = det3(m, 0,d2_23, 2,d2_03, 3,d2_02);
+ Scalar d3_2 = det3(m, 0,d2_13, 1,d2_03, 3,d2_01);
+ Scalar d3_3 = det3(m, 0,d2_12, 1,d2_02, 2,d2_01);
+ return internal::pmadd(-m(0,3),d3_0, m(1,3)*d3_1) +
+ internal::pmadd(-m(2,3),d3_2, m(3,3)*d3_3);
+ }
+protected:
+ static EIGEN_DEVICE_FUNC
+ Scalar det2(const Derived& m, Index i0, Index i1)
+ {
+ return m(i0,0) * m(i1,1) - m(i1,0) * m(i0,1);
+ }
+
+ static EIGEN_DEVICE_FUNC
+ Scalar det3(const Derived& m, Index i0, const Scalar& d0, Index i1, const Scalar& d1, Index i2, const Scalar& d2)
{
- // trick by Martin Costabel to compute 4x4 det with only 30 muls
- return bruteforce_det4_helper(m,0,1,2,3)
- - bruteforce_det4_helper(m,0,2,1,3)
- + bruteforce_det4_helper(m,0,3,1,2)
- + bruteforce_det4_helper(m,1,2,0,3)
- - bruteforce_det4_helper(m,1,3,0,2)
- + bruteforce_det4_helper(m,2,3,0,1);
+ return internal::pmadd(m(i0,2), d0, internal::pmadd(-m(i1,2), d1, m(i2,2)*d2));
}
};