aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/Dot.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2008-08-09 18:41:24 +0000
committerGravatar Gael Guennebaud <g.gael@free.fr>2008-08-09 18:41:24 +0000
commit4fa40367e9bf55ea8b2ad1040b3fb73f94e4f12f (patch)
tree3ca6d7cff691daf2d6bc8d6b1ecb00971f9debf3 /Eigen/src/Core/Dot.h
parentbecbeda50ac17288dba0a93c6adc67b663d32a7a (diff)
* Big change in Block and Map:
- added a MapBase base xpr on top of which Map and the specialization of Block are implemented - MapBase forces both aligned loads (and aligned stores, see below) in expressions such as "x.block(...) += other_expr" * Significant vectorization improvement: - added a AlignedBit flag meaning the first coeff/packet is aligned, this allows to not generate extra code to deal with the first unaligned part - removed all unaligned stores when no unrolling - removed unaligned loads in Sum when the input as the DirectAccessBit flag * Some code simplification in CacheFriendly product * Some minor documentation improvements
Diffstat (limited to 'Eigen/src/Core/Dot.h')
-rw-r--r--Eigen/src/Core/Dot.h16
1 files changed, 10 insertions, 6 deletions
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index a5d2f0ba3..9bdff50b3 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -123,7 +123,9 @@ struct ei_dot_vec_unroller<Derived1, Derived2, Index, Stop, true>
row1 = Derived1::RowsAtCompileTime == 1 ? 0 : Index,
col1 = Derived1::RowsAtCompileTime == 1 ? Index : 0,
row2 = Derived2::RowsAtCompileTime == 1 ? 0 : Index,
- col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0
+ col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0,
+ alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned,
+ alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned
};
typedef typename Derived1::Scalar Scalar;
@@ -131,7 +133,7 @@ struct ei_dot_vec_unroller<Derived1, Derived2, Index, Stop, true>
inline static PacketScalar run(const Derived1& v1, const Derived2& v2)
{
- return ei_pmul(v1.template packet<Aligned>(row1, col1), v2.template packet<Aligned>(row2, col2));
+ return ei_pmul(v1.template packet<alignment1>(row1, col1), v2.template packet<alignment2>(row2, col2));
}
};
@@ -175,20 +177,22 @@ struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
const int size = v1.size();
const int packetSize = ei_packet_traits<Scalar>::size;
const int alignedSize = (size/packetSize)*packetSize;
+ const int alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned;
+ const int alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned;
Scalar res;
// do the vectorizable part of the sum
if(size >= packetSize)
{
PacketScalar packet_res = ei_pmul(
- v1.template packet<Aligned>(0),
- v2.template packet<Aligned>(0)
+ v1.template packet<alignment1>(0),
+ v2.template packet<alignment2>(0)
);
for(int index = packetSize; index<alignedSize; index += packetSize)
{
packet_res = ei_pmadd(
- v1.template packet<Aligned>(index),
- v2.template packet<Aligned>(index),
+ v1.template packet<alignment1>(index),
+ v2.template packet<alignment2>(index),
packet_res
);
}