diff options
author | 2008-08-22 17:48:36 +0000 | |
---|---|---|
committer | 2008-08-22 17:48:36 +0000 | |
commit | f0394edfa7d063e37256e673cdecacd9f55f44ae (patch) | |
tree | a2e36374f3f51fc74f0d64c403f4368c8ae55d54 /Eigen/src/Core | |
parent | a95c1e190b6963543950f4b7831fcc0b844bd95f (diff) |
* bugfix in SolveTriangular found by Timothy Hunter (did not compiled for very small fixed size matrices)
* bugfix in Dot unroller
* added special random generator for the unit tests and reduced the tolerance threshold by an order of magnitude
this fixes issues with sum.cpp but other tests still failed sometimes, this have to be carefully checked...
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r-- | Eigen/src/Core/Dot.h | 13 | ||||
-rwxr-xr-x | Eigen/src/Core/SolveTriangular.h | 3 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/PacketMath.h | 4 |
3 files changed, 14 insertions, 6 deletions
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index c0caf8c06..eb25185b6 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -221,11 +221,18 @@ template<typename Derived1, typename Derived2> struct ei_dot_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling> { typedef typename Derived1::Scalar Scalar; + typedef typename ei_packet_traits<Scalar>::type PacketScalar; + enum { + PacketSize = ei_packet_traits<Scalar>::size, + Size = Derived1::SizeAtCompileTime, + VectorizationSize = (Size / PacketSize) * PacketSize + }; static Scalar run(const Derived1& v1, const Derived2& v2) { - return ei_predux( - ei_dot_vec_unroller<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>::run(v1, v2) - ); + Scalar res = ei_predux(ei_dot_vec_unroller<Derived1, Derived2, 0, VectorizationSize>::run(v1, v2)); + if (VectorizationSize != Size) + res += ei_dot_novec_unroller<Derived1, Derived2, VectorizationSize, Size>::run(v1, v2); + return res; } }; diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index 44edb46c1..2664bff38 100755 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -95,7 +95,8 @@ struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,RowMajor> int endBlock = startBlock + (IsLower ? 4 : -4); /* Process the i cols times 4 rows block, and keep the result in a temporary vector */ - Matrix<Scalar,4,1> btmp; + // FIXME use fixed size block but take care to small fixed size matrices... + Matrix<Scalar,Dynamic,1> btmp(4); if (IsLower) btmp = lhs.block(startBlock,0,4,i) * other.col(c).start(i); else diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index f2744e340..ede223a0c 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -220,7 +220,7 @@ struct ei_palign_impl<Offset,__m128> inline static void run(__m128& first, const __m128& second) { if (Offset!=0) - first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), (Offset)*4)); + first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4)); } }; @@ -230,7 +230,7 @@ struct ei_palign_impl<Offset,__m128i> inline static void run(__m128i& first, const __m128i& second) { if (Offset!=0) - first = _mm_alignr_epi8(second,first, (Offset)*4); + first = _mm_alignr_epi8(second,first, Offset*4); } }; |