aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2009-01-17 09:48:58 +0000
committerGravatar Gael Guennebaud <g.gael@free.fr>2009-01-17 09:48:58 +0000
commit1eec38dc36aad1dc5574eb8c384ee9b7422c5be8 (patch)
tree5208faa7dcc02c4eb3d0658082e0846bec83b39b /Eigen/src/Core
parente556e647f4b49c92dfbd46ffcd49b7956302e312 (diff)
Rewrite the vectorized meta unroller of sum to reduce instruction
dependency => significant speed up
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r--Eigen/src/Core/Sum.h33
1 files changed, 14 insertions, 19 deletions
diff --git a/Eigen/src/Core/Sum.h b/Eigen/src/Core/Sum.h
index e30392534..6d7e9959f 100644
--- a/Eigen/src/Core/Sum.h
+++ b/Eigen/src/Core/Sum.h
@@ -100,18 +100,13 @@ struct ei_sum_novec_unroller<Derived, Start, 1>
};
/*** vectorization ***/
-
-template<typename Derived, int Index, int Stop,
- bool LastPacket = (Stop-Index == ei_packet_traits<typename Derived::Scalar>::size)>
+
+template<typename Derived, int Start, int Length>
struct ei_sum_vec_unroller
{
enum {
- row = int(Derived::Flags)&RowMajorBit
- ? Index / int(Derived::ColsAtCompileTime)
- : Index % Derived::RowsAtCompileTime,
- col = int(Derived::Flags)&RowMajorBit
- ? Index % int(Derived::ColsAtCompileTime)
- : Index / Derived::RowsAtCompileTime
+ PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
+ HalfLength = Length/2
};
typedef typename Derived::Scalar Scalar;
@@ -120,22 +115,22 @@ struct ei_sum_vec_unroller
inline static PacketScalar run(const Derived &mat)
{
return ei_padd(
- mat.template packet<Aligned>(row, col),
- ei_sum_vec_unroller<Derived, Index+ei_packet_traits<typename Derived::Scalar>::size, Stop>::run(mat)
- );
+ ei_sum_vec_unroller<Derived, Start, HalfLength>::run(mat),
+ ei_sum_vec_unroller<Derived, Start+HalfLength, Length-HalfLength>::run(mat) );
}
};
-template<typename Derived, int Index, int Stop>
-struct ei_sum_vec_unroller<Derived, Index, Stop, true>
+template<typename Derived, int Start>
+struct ei_sum_vec_unroller<Derived, Start, 1>
{
enum {
+ index = Start * ei_packet_traits<typename Derived::Scalar>::size,
row = int(Derived::Flags)&RowMajorBit
- ? Index / int(Derived::ColsAtCompileTime)
- : Index % Derived::RowsAtCompileTime,
+ ? index / int(Derived::ColsAtCompileTime)
+ : index % Derived::RowsAtCompileTime,
col = int(Derived::Flags)&RowMajorBit
- ? Index % int(Derived::ColsAtCompileTime)
- : Index / Derived::RowsAtCompileTime,
+ ? index % int(Derived::ColsAtCompileTime)
+ : index / Derived::RowsAtCompileTime,
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
};
@@ -238,7 +233,7 @@ struct ei_sum_impl<Derived, LinearVectorization, CompleteUnrolling>
};
static Scalar run(const Derived& mat)
{
- Scalar res = ei_predux(ei_sum_vec_unroller<Derived, 0, VectorizationSize>::run(mat));
+ Scalar res = ei_predux(ei_sum_vec_unroller<Derived, 0, Size / PacketSize>::run(mat));
if (VectorizationSize != Size)
res += ei_sum_novec_unroller<Derived, VectorizationSize, Size-VectorizationSize>::run(mat);
return res;