diff options
author | 2014-04-24 05:47:53 -0700 | |
---|---|---|
committer | 2014-04-24 05:47:53 -0700 | |
commit | 8d85ce88e129d794d0700dd2c8eec2713449e54d (patch) | |
tree | f4b4402e965c20f50e494e5f39ee37ae9f612459 /Eigen/src/Core/arch | |
parent | 4eb92e564710cd55323302d31e47aff74d770426 (diff) |
Implement ptranspose on altivec and fix pgather/pscatter
Diffstat (limited to 'Eigen/src/Core/arch')
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/Complex.h | 15 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/AltiVec/PacketMath.h | 27 |
2 files changed, 39 insertions, 3 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index ec11cfaa0..ee1f008b1 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -21,6 +21,8 @@ static Packet16uc p16uc_COMPLEX_REV = vec_sld(p16uc_REVERSE, p16uc_REVERSE, 8); static Packet16uc p16uc_COMPLEX_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 1));//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 }; static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 2), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 3));//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 }; +static Packet16uc p16uc_COMPLEX_TRANSPOSE_0 = { 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23}; +static Packet16uc p16uc_COMPLEX_TRANSPOSE_1 = { 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31}; //---------- float ---------- struct Packet2cf @@ -52,7 +54,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits }; }; -template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; }; +template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; }; template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) { @@ -71,12 +73,12 @@ template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packe std::complex<float> EIGEN_ALIGN16 af[2]; af[0] = from[0*stride]; af[1] = from[1*stride]; - return Packet2cf(vec_ld(0, af)); + return Packet2cf(vec_ld(0, (const float*)af)); } template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, int stride) { std::complex<float> EIGEN_ALIGN16 af[2]; - vec_st(from.v, 0, af); + vec_st(from.v, 0, (float*)af); to[0*stride] = af[0]; to[1*stride] = af[1]; } @@ -227,6 +229,13 @@ template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV)); } +template<> EIGEN_STRONG_INLINE void ptranspose(Kernel<Packet2cf>& kernel) +{ + Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_0); + kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_1); + kernel.packet[0].v = tmp; +} + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 80a99a004..618d95d85 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -146,6 +146,7 @@ inline std::ostream & operator <<(std::ostream & s, const Packetbi & v) return s; } */ + template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html float EIGEN_ALIGN16 af[4]; @@ -533,6 +534,32 @@ struct palign_impl<Offset,Packet4i> } }; +template<> EIGEN_DEVICE_FUNC inline void +ptranspose(Kernel<Packet4f>& kernel) { + Packet4f t0, t1, t2, t3; + t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]); + t1 = vec_mergel(kernel.packet[0], kernel.packet[2]); + t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]); + t3 = vec_mergel(kernel.packet[1], kernel.packet[3]); + kernel.packet[0] = vec_mergeh(t0, t2); + kernel.packet[1] = vec_mergel(t0, t2); + kernel.packet[2] = vec_mergeh(t1, t3); + kernel.packet[3] = vec_mergel(t1, t3); +} + +template<> EIGEN_DEVICE_FUNC inline void +ptranspose(Kernel<Packet4i>& kernel) { + Packet4i t0, t1, t2, t3; + t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]); + t1 = vec_mergel(kernel.packet[0], kernel.packet[2]); + t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]); + t3 = vec_mergel(kernel.packet[1], kernel.packet[3]); + kernel.packet[0] = vec_mergeh(t0, t2); + kernel.packet[1] = vec_mergel(t0, t2); + kernel.packet[2] = vec_mergeh(t1, t3); + kernel.packet[3] = vec_mergel(t1, t3); +} + } // end namespace internal } // end namespace Eigen |