diff options
author | Gael Guennebaud <g.gael@free.fr> | 2008-08-09 18:41:24 +0000 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2008-08-09 18:41:24 +0000 |
commit | 4fa40367e9bf55ea8b2ad1040b3fb73f94e4f12f (patch) | |
tree | 3ca6d7cff691daf2d6bc8d6b1ecb00971f9debf3 /Eigen/src/Core/Dot.h | |
parent | becbeda50ac17288dba0a93c6adc67b663d32a7a (diff) |
* Big change in Block and Map:
- added a MapBase base xpr on top of which Map and the specialization
of Block are implemented
- MapBase forces both aligned loads (and aligned stores, see below) in expressions
such as "x.block(...) += other_expr"
* Significant vectorization improvement:
- added a AlignedBit flag meaning the first coeff/packet is aligned,
this allows to not generate extra code to deal with the first unaligned part
- removed all unaligned stores when no unrolling
- removed unaligned loads in Sum when the input as the DirectAccessBit flag
* Some code simplification in CacheFriendly product
* Some minor documentation improvements
Diffstat (limited to 'Eigen/src/Core/Dot.h')
-rw-r--r-- | Eigen/src/Core/Dot.h | 16 |
1 files changed, 10 insertions, 6 deletions
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index a5d2f0ba3..9bdff50b3 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -123,7 +123,9 @@ struct ei_dot_vec_unroller<Derived1, Derived2, Index, Stop, true> row1 = Derived1::RowsAtCompileTime == 1 ? 0 : Index, col1 = Derived1::RowsAtCompileTime == 1 ? Index : 0, row2 = Derived2::RowsAtCompileTime == 1 ? 0 : Index, - col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0 + col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0, + alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned, + alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned }; typedef typename Derived1::Scalar Scalar; @@ -131,7 +133,7 @@ struct ei_dot_vec_unroller<Derived1, Derived2, Index, Stop, true> inline static PacketScalar run(const Derived1& v1, const Derived2& v2) { - return ei_pmul(v1.template packet<Aligned>(row1, col1), v2.template packet<Aligned>(row2, col2)); + return ei_pmul(v1.template packet<alignment1>(row1, col1), v2.template packet<alignment2>(row2, col2)); } }; @@ -175,20 +177,22 @@ struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling> const int size = v1.size(); const int packetSize = ei_packet_traits<Scalar>::size; const int alignedSize = (size/packetSize)*packetSize; + const int alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned; + const int alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned; Scalar res; // do the vectorizable part of the sum if(size >= packetSize) { PacketScalar packet_res = ei_pmul( - v1.template packet<Aligned>(0), - v2.template packet<Aligned>(0) + v1.template packet<alignment1>(0), + v2.template packet<alignment2>(0) ); for(int index = packetSize; index<alignedSize; index += packetSize) { packet_res = ei_pmadd( - v1.template packet<Aligned>(index), - v2.template packet<Aligned>(index), + v1.template packet<alignment1>(index), + v2.template packet<alignment2>(index), packet_res ); } |