diff options
author | Gael Guennebaud <g.gael@free.fr> | 2016-08-30 23:16:38 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2016-08-30 23:16:38 +0200 |
commit | 8c48d425309821c17e2543c1e0106f308731f8be (patch) | |
tree | 2a48ae289d9cb287abf02020b09e2a79a416f196 | |
parent | e7fbbc2748dac4addbc429e053ead795968d86c6 (diff) |
Fix 4x4 inverse with non-linear destination
-rw-r--r-- | Eigen/src/LU/arch/Inverse_SSE.h | 28 | ||||
-rw-r--r-- | test/prec_inverse_4x4.cpp | 15 |
2 files changed, 31 insertions, 12 deletions
diff --git a/Eigen/src/LU/arch/Inverse_SSE.h b/Eigen/src/LU/arch/Inverse_SSE.h index e1470c664..ebb64a62b 100644 --- a/Eigen/src/LU/arch/Inverse_SSE.h +++ b/Eigen/src/LU/arch/Inverse_SSE.h @@ -153,10 +153,12 @@ struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType> iC = _mm_mul_ps(rd,iC); iD = _mm_mul_ps(rd,iD); - result.template writePacket<ResultAlignment>( 0, _mm_shuffle_ps(iA,iB,0x77)); - result.template writePacket<ResultAlignment>( 4, _mm_shuffle_ps(iA,iB,0x22)); - result.template writePacket<ResultAlignment>( 8, _mm_shuffle_ps(iC,iD,0x77)); - result.template writePacket<ResultAlignment>(12, _mm_shuffle_ps(iC,iD,0x22)); + Index res_stride = result.outerStride(); + float* res = result.data(); + pstoret<float, Packet4f, ResultAlignment>(res+0, _mm_shuffle_ps(iA,iB,0x77)); + pstoret<float, Packet4f, ResultAlignment>(res+res_stride, _mm_shuffle_ps(iA,iB,0x22)); + pstoret<float, Packet4f, ResultAlignment>(res+2*res_stride, _mm_shuffle_ps(iC,iD,0x77)); + pstoret<float, Packet4f, ResultAlignment>(res+3*res_stride, _mm_shuffle_ps(iC,iD,0x22)); } }; @@ -316,14 +318,16 @@ struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType> iC1 = _mm_sub_pd(_mm_mul_pd(B1, dC), iC1); iC2 = _mm_sub_pd(_mm_mul_pd(B2, dC), iC2); - result.template writePacket<ResultAlignment>( 0, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1)); // iA# / det - result.template writePacket<ResultAlignment>( 4, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2)); - result.template writePacket<ResultAlignment>( 2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1)); // iB# / det - result.template writePacket<ResultAlignment>( 6, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2)); - result.template writePacket<ResultAlignment>( 8, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1)); // iC# / det - result.template writePacket<ResultAlignment>(12, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2)); - result.template writePacket<ResultAlignment>(10, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1)); // iD# / det - result.template writePacket<ResultAlignment>(14, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2)); + Index res_stride = result.outerStride(); + double* res = result.data(); + pstoret<double, Packet2d, ResultAlignment>(res+0, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1)); + pstoret<double, Packet2d, ResultAlignment>(res+res_stride, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2)); + pstoret<double, Packet2d, ResultAlignment>(res+2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1)); + pstoret<double, Packet2d, ResultAlignment>(res+res_stride+2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2)); + pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1)); + pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2)); + pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1)); + pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2)); } }; diff --git a/test/prec_inverse_4x4.cpp b/test/prec_inverse_4x4.cpp index c4ef2d4bd..eb6ad18c9 100644 --- a/test/prec_inverse_4x4.cpp +++ b/test/prec_inverse_4x4.cpp @@ -53,14 +53,29 @@ template<typename MatrixType> void inverse_general_4x4(int repeat) // FIXME that 1.25 used to be 1.2 until we tested gcc 4.1 on 30 June 2010 and got 1.21. VERIFY(error_avg < (NumTraits<Scalar>::IsComplex ? 8.0 : 1.25)); VERIFY(error_max < (NumTraits<Scalar>::IsComplex ? 64.0 : 20.0)); + + { + int s = 5;//internal::random<int>(4,10); + int i = 0;//internal::random<int>(0,s-4); + int j = 0;//internal::random<int>(0,s-4); + Matrix<Scalar,5,5> mat(s,s); + mat.setRandom(); + MatrixType submat = mat.template block<4,4>(i,j); + MatrixType mat_inv = mat.template block<4,4>(i,j).inverse(); + VERIFY_IS_APPROX(mat_inv, submat.inverse()); + mat.template block<4,4>(i,j) = submat.inverse(); + VERIFY_IS_APPROX(mat_inv, (mat.template block<4,4>(i,j))); + } } void test_prec_inverse_4x4() { CALL_SUBTEST_1((inverse_permutation_4x4<Matrix4f>())); CALL_SUBTEST_1(( inverse_general_4x4<Matrix4f>(200000 * g_repeat) )); + CALL_SUBTEST_1(( inverse_general_4x4<Matrix<float,4,4,RowMajor> >(200000 * g_repeat) )); CALL_SUBTEST_2((inverse_permutation_4x4<Matrix<double,4,4,RowMajor> >())); + CALL_SUBTEST_2(( inverse_general_4x4<Matrix<double,4,4,ColMajor> >(200000 * g_repeat) )); CALL_SUBTEST_2(( inverse_general_4x4<Matrix<double,4,4,RowMajor> >(200000 * g_repeat) )); CALL_SUBTEST_3((inverse_permutation_4x4<Matrix4cf>())); |