aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2014-04-24 05:47:53 -0700
committerGravatar Gael Guennebaud <g.gael@free.fr>2014-04-24 05:47:53 -0700
commit8d85ce88e129d794d0700dd2c8eec2713449e54d (patch)
treef4b4402e965c20f50e494e5f39ee37ae9f612459 /Eigen/src
parent4eb92e564710cd55323302d31e47aff74d770426 (diff)
Implement ptranspose on altivec and fix pgather/pscatter
Diffstat (limited to 'Eigen/src')
-rw-r--r--Eigen/src/Core/arch/AltiVec/Complex.h15
-rwxr-xr-xEigen/src/Core/arch/AltiVec/PacketMath.h27
2 files changed, 39 insertions, 3 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h
index ec11cfaa0..ee1f008b1 100644
--- a/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -21,6 +21,8 @@ static Packet16uc p16uc_COMPLEX_REV = vec_sld(p16uc_REVERSE, p16uc_REVERSE, 8);
static Packet16uc p16uc_COMPLEX_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 1));//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 2), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 3));//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
+static Packet16uc p16uc_COMPLEX_TRANSPOSE_0 = { 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
+static Packet16uc p16uc_COMPLEX_TRANSPOSE_1 = { 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
//---------- float ----------
struct Packet2cf
@@ -52,7 +54,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
@@ -71,12 +73,12 @@ template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packe
std::complex<float> EIGEN_ALIGN16 af[2];
af[0] = from[0*stride];
af[1] = from[1*stride];
- return Packet2cf(vec_ld(0, af));
+ return Packet2cf(vec_ld(0, (const float*)af));
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, int stride)
{
std::complex<float> EIGEN_ALIGN16 af[2];
- vec_st(from.v, 0, af);
+ vec_st(from.v, 0, (float*)af);
to[0*stride] = af[0];
to[1*stride] = af[1];
}
@@ -227,6 +229,13 @@ template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x
return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV));
}
+template<> EIGEN_STRONG_INLINE void ptranspose(Kernel<Packet2cf>& kernel)
+{
+ Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_0);
+ kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_1);
+ kernel.packet[0].v = tmp;
+}
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 80a99a004..618d95d85 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -146,6 +146,7 @@ inline std::ostream & operator <<(std::ostream & s, const Packetbi & v)
return s;
}
*/
+
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
float EIGEN_ALIGN16 af[4];
@@ -533,6 +534,32 @@ struct palign_impl<Offset,Packet4i>
}
};
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(Kernel<Packet4f>& kernel) {
+ Packet4f t0, t1, t2, t3;
+ t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
+ t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
+ t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
+ t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
+ kernel.packet[0] = vec_mergeh(t0, t2);
+ kernel.packet[1] = vec_mergel(t0, t2);
+ kernel.packet[2] = vec_mergeh(t1, t3);
+ kernel.packet[3] = vec_mergel(t1, t3);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(Kernel<Packet4i>& kernel) {
+ Packet4i t0, t1, t2, t3;
+ t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
+ t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
+ t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
+ t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
+ kernel.packet[0] = vec_mergeh(t0, t2);
+ kernel.packet[1] = vec_mergel(t0, t2);
+ kernel.packet[2] = vec_mergeh(t1, t3);
+ kernel.packet[3] = vec_mergel(t1, t3);
+}
+
} // end namespace internal
} // end namespace Eigen