aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Jacobi
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2017-09-06 10:02:49 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2017-09-06 10:02:49 +0200
commitb35d1ce4a532a40e16927bb303a4c48b2bef1d7c (patch)
tree67e6a7cd1577a3d647a2ed13537180b22b218605 /Eigen/src/Jacobi
parent80142362ac35ca77bfc5ccf7ba49c9f034b57abc (diff)
Implement true compile-time "if" for apply_rotation_in_the_plane. This fixes a compilation issue for vectorized real type with missing vectorization for complexes, e.g. AVX512.
Diffstat (limited to 'Eigen/src/Jacobi')
-rw-r--r--Eigen/src/Jacobi/Jacobi.h240
1 files changed, 131 insertions, 109 deletions
diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h
index 75595517a..af1228cb8 100644
--- a/Eigen/src/Jacobi/Jacobi.h
+++ b/Eigen/src/Jacobi/Jacobi.h
@@ -309,61 +309,119 @@ inline void MatrixBase<Derived>::applyOnTheRight(Index p, Index q, const JacobiR
}
namespace internal {
-template<typename VectorX, typename VectorY, typename OtherScalar>
-void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
+
+template<typename Scalar, typename OtherScalar,
+ int SizeAtCompileTime, int MinAlignment, bool Vectorizable>
+struct apply_rotation_in_the_plane_selector
{
- typedef typename VectorX::Scalar Scalar;
- enum {
- PacketSize = packet_traits<Scalar>::size,
- OtherPacketSize = packet_traits<OtherScalar>::size
- };
- typedef typename packet_traits<Scalar>::type Packet;
- typedef typename packet_traits<OtherScalar>::type OtherPacket;
- eigen_assert(xpr_x.size() == xpr_y.size());
- Index size = xpr_x.size();
- Index incrx = xpr_x.derived().innerStride();
- Index incry = xpr_y.derived().innerStride();
+ static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
+ {
+ for(Index i=0; i<size; ++i)
+ {
+ Scalar xi = *x;
+ Scalar yi = *y;
+ *x = c * xi + numext::conj(s) * yi;
+ *y = -s * xi + numext::conj(c) * yi;
+ x += incrx;
+ y += incry;
+ }
+ }
+};
- Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0);
- Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0);
-
- OtherScalar c = j.c();
- OtherScalar s = j.s();
- if (c==OtherScalar(1) && s==OtherScalar(0))
- return;
+template<typename Scalar, typename OtherScalar,
+ int SizeAtCompileTime, int MinAlignment>
+struct apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,true /* vectorizable */>
+{
+ static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
+ {
+ enum {
+ PacketSize = packet_traits<Scalar>::size,
+ OtherPacketSize = packet_traits<OtherScalar>::size
+ };
+ typedef typename packet_traits<Scalar>::type Packet;
+ typedef typename packet_traits<OtherScalar>::type OtherPacket;
+
+ /*** dynamic-size vectorized paths ***/
+ if(SizeAtCompileTime == Dynamic && ((incrx==1 && incry==1) || PacketSize == 1))
+ {
+ // both vectors are sequentially stored in memory => vectorization
+ enum { Peeling = 2 };
- /*** dynamic-size vectorized paths ***/
+ Index alignedStart = internal::first_default_aligned(y, size);
+ Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
- if(VectorX::SizeAtCompileTime == Dynamic &&
- (VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
- (PacketSize == OtherPacketSize) &&
- ((incrx==1 && incry==1) || PacketSize == 1))
- {
- // both vectors are sequentially stored in memory => vectorization
- enum { Peeling = 2 };
+ const OtherPacket pc = pset1<OtherPacket>(c);
+ const OtherPacket ps = pset1<OtherPacket>(s);
+ conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;
+ conj_helper<OtherPacket,Packet,false,false> pm;
- Index alignedStart = internal::first_default_aligned(y, size);
- Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
+ for(Index i=0; i<alignedStart; ++i)
+ {
+ Scalar xi = x[i];
+ Scalar yi = y[i];
+ x[i] = c * xi + numext::conj(s) * yi;
+ y[i] = -s * xi + numext::conj(c) * yi;
+ }
- const OtherPacket pc = pset1<OtherPacket>(c);
- const OtherPacket ps = pset1<OtherPacket>(s);
- conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;
- conj_helper<OtherPacket,Packet,false,false> pm;
+ Scalar* EIGEN_RESTRICT px = x + alignedStart;
+ Scalar* EIGEN_RESTRICT py = y + alignedStart;
- for(Index i=0; i<alignedStart; ++i)
- {
- Scalar xi = x[i];
- Scalar yi = y[i];
- x[i] = c * xi + numext::conj(s) * yi;
- y[i] = -s * xi + numext::conj(c) * yi;
- }
+ if(internal::first_default_aligned(x, size)==alignedStart)
+ {
+ for(Index i=alignedStart; i<alignedEnd; i+=PacketSize)
+ {
+ Packet xi = pload<Packet>(px);
+ Packet yi = pload<Packet>(py);
+ pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+ pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
+ px += PacketSize;
+ py += PacketSize;
+ }
+ }
+ else
+ {
+ Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize);
+ for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize)
+ {
+ Packet xi = ploadu<Packet>(px);
+ Packet xi1 = ploadu<Packet>(px+PacketSize);
+ Packet yi = pload <Packet>(py);
+ Packet yi1 = pload <Packet>(py+PacketSize);
+ pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+ pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1)));
+ pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
+ pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1)));
+ px += Peeling*PacketSize;
+ py += Peeling*PacketSize;
+ }
+ if(alignedEnd!=peelingEnd)
+ {
+ Packet xi = ploadu<Packet>(x+peelingEnd);
+ Packet yi = pload <Packet>(y+peelingEnd);
+ pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+ pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
+ }
+ }
- Scalar* EIGEN_RESTRICT px = x + alignedStart;
- Scalar* EIGEN_RESTRICT py = y + alignedStart;
+ for(Index i=alignedEnd; i<size; ++i)
+ {
+ Scalar xi = x[i];
+ Scalar yi = y[i];
+ x[i] = c * xi + numext::conj(s) * yi;
+ y[i] = -s * xi + numext::conj(c) * yi;
+ }
+ }
- if(internal::first_default_aligned(x, size)==alignedStart)
+ /*** fixed-size vectorized path ***/
+ else if(SizeAtCompileTime != Dynamic && MinAlignment>0) // FIXME should be compared to the required alignment
{
- for(Index i=alignedStart; i<alignedEnd; i+=PacketSize)
+ const OtherPacket pc = pset1<OtherPacket>(c);
+ const OtherPacket ps = pset1<OtherPacket>(s);
+ conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;
+ conj_helper<OtherPacket,Packet,false,false> pm;
+ Scalar* EIGEN_RESTRICT px = x;
+ Scalar* EIGEN_RESTRICT py = y;
+ for(Index i=0; i<size; i+=PacketSize)
{
Packet xi = pload<Packet>(px);
Packet yi = pload<Packet>(py);
@@ -373,76 +431,40 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
py += PacketSize;
}
}
- else
- {
- Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize);
- for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize)
- {
- Packet xi = ploadu<Packet>(px);
- Packet xi1 = ploadu<Packet>(px+PacketSize);
- Packet yi = pload <Packet>(py);
- Packet yi1 = pload <Packet>(py+PacketSize);
- pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
- pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1)));
- pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
- pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1)));
- px += Peeling*PacketSize;
- py += Peeling*PacketSize;
- }
- if(alignedEnd!=peelingEnd)
- {
- Packet xi = ploadu<Packet>(x+peelingEnd);
- Packet yi = pload <Packet>(y+peelingEnd);
- pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
- pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
- }
- }
- for(Index i=alignedEnd; i<size; ++i)
+ /*** non-vectorized path ***/
+ else
{
- Scalar xi = x[i];
- Scalar yi = y[i];
- x[i] = c * xi + numext::conj(s) * yi;
- y[i] = -s * xi + numext::conj(c) * yi;
+ apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,false>::run(x,incrx,y,incry,size,c,s);
}
}
+};
- /*** fixed-size vectorized path ***/
- else if(VectorX::SizeAtCompileTime != Dynamic &&
- (VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
- (PacketSize == OtherPacketSize) &&
- (EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment
- {
- const OtherPacket pc = pset1<OtherPacket>(c);
- const OtherPacket ps = pset1<OtherPacket>(s);
- conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;
- conj_helper<OtherPacket,Packet,false,false> pm;
- Scalar* EIGEN_RESTRICT px = x;
- Scalar* EIGEN_RESTRICT py = y;
- for(Index i=0; i<size; i+=PacketSize)
- {
- Packet xi = pload<Packet>(px);
- Packet yi = pload<Packet>(py);
- pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
- pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
- px += PacketSize;
- py += PacketSize;
- }
- }
+template<typename VectorX, typename VectorY, typename OtherScalar>
+void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
+{
+ typedef typename VectorX::Scalar Scalar;
+ const bool Vectorizable = (VectorX::Flags & VectorY::Flags & PacketAccessBit)
+ && (int(packet_traits<Scalar>::size) == int(packet_traits<OtherScalar>::size));
- /*** non-vectorized path ***/
- else
- {
- for(Index i=0; i<size; ++i)
- {
- Scalar xi = *x;
- Scalar yi = *y;
- *x = c * xi + numext::conj(s) * yi;
- *y = -s * xi + numext::conj(c) * yi;
- x += incrx;
- y += incry;
- }
- }
+ eigen_assert(xpr_x.size() == xpr_y.size());
+ Index size = xpr_x.size();
+ Index incrx = xpr_x.derived().innerStride();
+ Index incry = xpr_y.derived().innerStride();
+
+ Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0);
+ Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0);
+
+ OtherScalar c = j.c();
+ OtherScalar s = j.s();
+ if (c==OtherScalar(1) && s==OtherScalar(0))
+ return;
+
+ apply_rotation_in_the_plane_selector<
+ Scalar,OtherScalar,
+ VectorX::SizeAtCompileTime,
+ EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment),
+ Vectorizable>::run(x,incrx,y,incry,size,c,s);
}
} // end namespace internal