aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Jacobi
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2017-04-14 11:05:13 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2017-04-14 11:05:13 +0200
commitd9084ac8e142697f0d767092a17ffc3a7a18a2e4 (patch)
tree054819b5eba9e7aa5c05c45f49fc2f481b9d7cbc /Eigen/src/Jacobi
parentf75dfdda7e6f133c4d45671efb6799dbb975b1ef (diff)
Improve mixing of complex and real in the vectorized path of apply_rotation_in_the_plane
Diffstat (limited to 'Eigen/src/Jacobi')
-rw-r--r--Eigen/src/Jacobi/Jacobi.h42
1 files changed, 25 insertions, 17 deletions
diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h
index d25af8e90..c30326e1d 100644
--- a/Eigen/src/Jacobi/Jacobi.h
+++ b/Eigen/src/Jacobi/Jacobi.h
@@ -302,8 +302,12 @@ template<typename VectorX, typename VectorY, typename OtherScalar>
void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
{
typedef typename VectorX::Scalar Scalar;
- enum { PacketSize = packet_traits<Scalar>::size };
+ enum {
+ PacketSize = packet_traits<Scalar>::size,
+ OtherPacketSize = packet_traits<OtherScalar>::size
+ };
typedef typename packet_traits<Scalar>::type Packet;
+ typedef typename packet_traits<OtherScalar>::type OtherPacket;
eigen_assert(xpr_x.size() == xpr_y.size());
Index size = xpr_x.size();
Index incrx = xpr_x.derived().innerStride();
@@ -321,6 +325,7 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
if(VectorX::SizeAtCompileTime == Dynamic &&
(VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
+ (PacketSize == OtherPacketSize) &&
((incrx==1 && incry==1) || PacketSize == 1))
{
// both vectors are sequentially stored in memory => vectorization
@@ -329,9 +334,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
Index alignedStart = internal::first_default_aligned(y, size);
Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
- const Packet pc = pset1<Packet>(c);
- const Packet ps = pset1<Packet>(s);
- conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj;
+ const OtherPacket pc = pset1<OtherPacket>(c);
+ const OtherPacket ps = pset1<OtherPacket>(s);
+ conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;
+ conj_helper<OtherPacket,Packet,false,false> pm;
for(Index i=0; i<alignedStart; ++i)
{
@@ -350,8 +356,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
{
Packet xi = pload<Packet>(px);
Packet yi = pload<Packet>(py);
- pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
- pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
+ pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+ pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
px += PacketSize;
py += PacketSize;
}
@@ -365,10 +371,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
Packet xi1 = ploadu<Packet>(px+PacketSize);
Packet yi = pload <Packet>(py);
Packet yi1 = pload <Packet>(py+PacketSize);
- pstoreu(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
- pstoreu(px+PacketSize, padd(pmul(pc,xi1),pcj.pmul(ps,yi1)));
- pstore (py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
- pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pmul(ps,xi1)));
+ pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+ pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1)));
+ pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
+ pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1)));
px += Peeling*PacketSize;
py += Peeling*PacketSize;
}
@@ -376,8 +382,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
{
Packet xi = ploadu<Packet>(x+peelingEnd);
Packet yi = pload <Packet>(y+peelingEnd);
- pstoreu(x+peelingEnd, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
- pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
+ pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+ pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
}
}
@@ -393,19 +399,21 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
/*** fixed-size vectorized path ***/
else if(VectorX::SizeAtCompileTime != Dynamic &&
(VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
+ (PacketSize == OtherPacketSize) &&
(EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment
{
- const Packet pc = pset1<Packet>(c);
- const Packet ps = pset1<Packet>(s);
- conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj;
+ const OtherPacket pc = pset1<OtherPacket>(c);
+ const OtherPacket ps = pset1<OtherPacket>(s);
+ conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;
+ conj_helper<OtherPacket,Packet,false,false> pm;
Scalar* EIGEN_RESTRICT px = x;
Scalar* EIGEN_RESTRICT py = y;
for(Index i=0; i<size; i+=PacketSize)
{
Packet xi = pload<Packet>(px);
Packet yi = pload<Packet>(py);
- pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
- pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
+ pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+ pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
px += PacketSize;
py += PacketSize;
}