diff options
author | Benoit Jacob <jacob.benoit.1@gmail.com> | 2010-10-12 09:58:53 -0400 |
---|---|---|
committer | Benoit Jacob <jacob.benoit.1@gmail.com> | 2010-10-12 09:58:53 -0400 |
commit | dbedc700123381bc478ae45897a4bb90868781dc (patch) | |
tree | 551161f6429d0ed7a37c30b2e089a72dce8f45ef /Eigen/src/Jacobi | |
parent | 12a152031d46b6b73c27e7876fef81eae10aafc2 (diff) |
Jacobi improvements:
* add fixed-size vectorized path
* add missing restrict keywords
* use innerStride()
* allow vectorization even if innerStride()>1, if PacketSize==1
(think of the case of rows of std::complex<double>)
Diffstat (limited to 'Eigen/src/Jacobi')
-rw-r--r-- | Eigen/src/Jacobi/Jacobi.h | 42 |
1 files changed, 35 insertions, 7 deletions
diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h index 01f4e6e8f..6fd1ed389 100644 --- a/Eigen/src/Jacobi/Jacobi.h +++ b/Eigen/src/Jacobi/Jacobi.h @@ -305,19 +305,24 @@ void /*EIGEN_DONT_INLINE*/ ei_apply_rotation_in_the_plane(VectorX& _x, VectorY& { typedef typename VectorX::Index Index; typedef typename VectorX::Scalar Scalar; + enum { PacketSize = ei_packet_traits<Scalar>::size }; + typedef typename ei_packet_traits<Scalar>::type Packet; ei_assert(_x.size() == _y.size()); Index size = _x.size(); - Index incrx = size ==1 ? 1 : &_x.coeffRef(1) - &_x.coeffRef(0); - Index incry = size ==1 ? 1 : &_y.coeffRef(1) - &_y.coeffRef(0); + Index incrx = _x.innerStride(); + Index incry = _y.innerStride(); Scalar* EIGEN_RESTRICT x = &_x.coeffRef(0); Scalar* EIGEN_RESTRICT y = &_y.coeffRef(0); - if((VectorX::Flags & VectorY::Flags & PacketAccessBit) && incrx==1 && incry==1) + /*** dynamic-size vectorized paths ***/ + + if(VectorX::SizeAtCompileTime == Dynamic && + (VectorX::Flags & VectorY::Flags & PacketAccessBit) && + ((incrx==1 && incry==1) || PacketSize == 1)) { // both vectors are sequentially stored in memory => vectorization - typedef typename ei_packet_traits<Scalar>::type Packet; - enum { PacketSize = ei_packet_traits<Scalar>::size, Peeling = 2 }; + enum { Peeling = 2 }; Index alignedStart = ei_first_aligned(y, size); Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize; @@ -334,8 +339,8 @@ void /*EIGEN_DONT_INLINE*/ ei_apply_rotation_in_the_plane(VectorX& _x, VectorY& y[i] = -j.s() * xi + ei_conj(j.c()) * yi; } - Scalar* px = x + alignedStart; - Scalar* py = y + alignedStart; + Scalar* EIGEN_RESTRICT px = x + alignedStart; + Scalar* EIGEN_RESTRICT py = y + alignedStart; if(ei_first_aligned(x, size)==alignedStart) { @@ -382,6 +387,29 @@ void /*EIGEN_DONT_INLINE*/ ei_apply_rotation_in_the_plane(VectorX& _x, VectorY& y[i] = -j.s() * xi + ei_conj(j.c()) * yi; } } + + /*** fixed-size vectorized path ***/ + else if(VectorX::SizeAtCompileTime != Dynamic && + (VectorX::Flags & VectorY::Flags & PacketAccessBit) && + (VectorX::Flags & VectorY::Flags & AlignedBit)) + { + const Packet pc = ei_pset1<Packet>(j.c()); + const Packet ps = ei_pset1<Packet>(j.s()); + ei_conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj; + Scalar* EIGEN_RESTRICT px = x; + Scalar* EIGEN_RESTRICT py = y; + for(Index i=0; i<size; i+=PacketSize) + { + Packet xi = ei_pload<Packet>(px); + Packet yi = ei_pload<Packet>(py); + ei_pstore(px, ei_padd(ei_pmul(pc,xi),pcj.pmul(ps,yi))); + ei_pstore(py, ei_psub(pcj.pmul(pc,yi),ei_pmul(ps,xi))); + px += PacketSize; + py += PacketSize; + } + } + + /*** non-vectorized path ***/ else { for(Index i=0; i<size; ++i) |