From 8c34b5a0e3d45e34bf674baf5cd384b01943423b Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Sun, 19 Jun 2016 16:13:17 -0300 Subject: mostly cleanups and modernizing code --- Eigen/src/Core/arch/AltiVec/Complex.h | 158 ++++++++++++++++++++-------------- 1 file changed, 91 insertions(+), 67 deletions(-) (limited to 'Eigen/src/Core/arch') diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index 7631fec91..9feef7782 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -42,6 +42,7 @@ template<> struct packet_traits > : default_packet_traits Vectorizable = 1, AlignedOnScalar = 1, size = 2, + HasHalfPacket = 0, HasAdd = 1, HasSub = 1, @@ -52,7 +53,8 @@ template<> struct packet_traits > : default_packet_traits HasAbs2 = 0, HasMin = 0, HasMax = 0, - HasSetLinear = 0 + HasSetLinear = 0, + HasBlend = 1 }; }; @@ -61,7 +63,6 @@ template<> struct unpacket_traits { typedef std::complex type; template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex& from) { Packet2cf res; - /* On AltiVec we cannot load 64-bit registers, so wa have to take care of alignment */ if((ptrdiff_t(&from) % 16) == 0) res.v = pload((const float *)&from); else @@ -70,26 +71,32 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex EIGEN_STRONG_INLINE Packet2cf pload (const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload((const float*)from)); } +template<> EIGEN_STRONG_INLINE Packet2cf ploadu(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu((const float*)from)); } +template<> EIGEN_STRONG_INLINE Packet2cf ploaddup(const std::complex* from) { return pset1(*from); } + +template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } +template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } + template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather, Packet2cf>(const std::complex* from, Index stride) { std::complex EIGEN_ALIGN16 af[2]; af[0] = from[0*stride]; af[1] = from[1*stride]; - return Packet2cf(vec_ld(0, (const float*)af)); + return pload(af); } template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet2cf>(std::complex* to, const Packet2cf& from, Index stride) { std::complex EIGEN_ALIGN16 af[2]; - vec_st(from.v, 0, (float*)af); + pstore >((std::complex *) af, from); to[0*stride] = af[0]; to[1*stride] = af[1]; } - -template<> EIGEN_STRONG_INLINE Packet2cf padd(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf psub(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf padd(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd(a.v, b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf psub(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub(a.v, b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, p4ui_CONJ_XOR)); } +template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor(a.v, reinterpret_cast(p4ui_CONJ_XOR))); } template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) { @@ -103,30 +110,19 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, con v1 = vec_madd(v1, b.v, p4f_ZERO); // multiply a_im * b and get the conjugate result v2 = vec_madd(v2, b.v, p4f_ZERO); - v2 = (Packet4f) vec_xor((Packet4ui)v2, p4ui_CONJ_XOR); + v2 = reinterpret_cast(pxor(v2, reinterpret_cast(p4ui_CONJ_XOR))); // permute back to a proper order v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV); return Packet2cf(vec_add(v1, v2)); } -template<> EIGEN_STRONG_INLINE Packet2cf pand (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf por (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_or(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf pxor (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_xor(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf pandnot(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); } - -template<> EIGEN_STRONG_INLINE Packet2cf pload (const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload((const float*)from)); } -template<> EIGEN_STRONG_INLINE Packet2cf ploadu(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu((const float*)from)); } - -template<> EIGEN_STRONG_INLINE Packet2cf ploaddup(const std::complex* from) -{ - return pset1(*from); -} - -template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } -template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } +template<> EIGEN_STRONG_INLINE Packet2cf pand (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand(a.v, b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf por (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por(a.v, b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pxor (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor(a.v, b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pandnot(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot(a.v, b.v)); } -template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { vec_dstt((float *)addr, DST_CTRL(2,2,32), DST_CHAN); } +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_PPC_PREFETCH(addr); } template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet2cf& a) { @@ -146,23 +142,23 @@ template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) template<> EIGEN_STRONG_INLINE std::complex predux(const Packet2cf& a) { Packet4f b; - b = (Packet4f) vec_sld(a.v, a.v, 8); - b = padd(a.v, b); - return pfirst(Packet2cf(b)); + b = vec_sld(a.v, a.v, 8); + b = padd(a.v, b); + return pfirst(Packet2cf(b)); } template<> EIGEN_STRONG_INLINE Packet2cf preduxp(const Packet2cf* vecs) { Packet4f b1, b2; #ifdef _BIG_ENDIAN - b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8); - b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8); + b1 = vec_sld(vecs[0].v, vecs[1].v, 8); + b2 = vec_sld(vecs[1].v, vecs[0].v, 8); #else - b1 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8); - b2 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8); + b1 = vec_sld(vecs[1].v, vecs[0].v, 8); + b2 = vec_sld(vecs[0].v, vecs[1].v, 8); #endif - b2 = (Packet4f) vec_sld(b2, b2, 8); - b2 = padd(b1, b2); + b2 = vec_sld(b2, b2, 8); + b2 = padd(b1, b2); return Packet2cf(b2); } @@ -171,10 +167,10 @@ template<> EIGEN_STRONG_INLINE std::complex predux_mul(const P { Packet4f b; Packet2cf prod; - b = (Packet4f) vec_sld(a.v, a.v, 8); - prod = pmul(a, Packet2cf(b)); + b = vec_sld(a.v, a.v, 8); + prod = pmul(a, Packet2cf(b)); - return pfirst(prod); + return pfirst(prod); } template @@ -226,12 +222,30 @@ template<> struct conj_helper } }; +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const + { return padd(c, pmul(x,y)); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const + { return Packet2cf(internal::pmul(x, y.v)); } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const + { return padd(c, pmul(x,y)); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const + { return Packet2cf(internal::pmul(x.v, y)); } +}; + template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) { // TODO optimize it for AltiVec - Packet2cf res = conj_helper().pmul(a,b); - Packet4f s = vec_madd(b.v, b.v, p4f_ZERO); - return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX32_REV)))); + Packet2cf res = conj_helper().pmul(a, b); + Packet4f s = pmul(b.v, b.v); + return Packet2cf(pdiv(res.v, padd(s, vec_perm(s, s, p16uc_COMPLEX32_REV)))); } template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x) @@ -246,6 +260,12 @@ EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) kernel.packet[0].v = tmp; } +template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) { + Packet2cf result; + result.v = reinterpret_cast(pblend(ifPacket, reinterpret_cast(thenPacket.v), reinterpret_cast(elsePacket.v))); + return result; +} + //---------- double ---------- #ifdef __VSX__ struct Packet1cd @@ -303,10 +323,10 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet1c to[1*stride] = af[1]; } -template<> EIGEN_STRONG_INLINE Packet1cd padd(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_add(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet1cd psub(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_sub(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd padd(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(padd(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd psub(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(psub(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); } -template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); } +template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(pxor(a.v, reinterpret_cast(p2ul_CONJ_XOR2))); } template<> EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) { @@ -320,23 +340,20 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, con v1 = vec_madd(a_re, b.v, p2d_ZERO); // multiply a_im * b and get the conjugate result v2 = vec_madd(a_im, b.v, p2d_ZERO); - v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8); - v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1); + v2 = reinterpret_cast(vec_sld(reinterpret_cast(v2), reinterpret_cast(v2), 8)); + v2 = pxor(v2, reinterpret_cast(p2ul_CONJ_XOR1)); return Packet1cd(vec_add(v1, v2)); } -template<> EIGEN_STRONG_INLINE Packet1cd pand (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet1cd por (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet1cd pxor (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet1cd pandnot(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); } +template<> EIGEN_STRONG_INLINE Packet1cd pand (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd por (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(por(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pxor (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pxor(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pandnot(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pandnot(a.v, b.v)); } -template<> EIGEN_STRONG_INLINE Packet1cd ploaddup(const std::complex* from) -{ - return pset1(*from); -} +template<> EIGEN_STRONG_INLINE Packet1cd ploaddup(const std::complex* from) { return pset1(*from); } -template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { vec_dstt((long *)addr, DST_CTRL(2,2,32), DST_CHAN); } +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_PPC_PREFETCH(addr); } template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet1cd& a) { @@ -348,20 +365,10 @@ template<> EIGEN_STRONG_INLINE std::complex pfirst(const Pac template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; } -template<> EIGEN_STRONG_INLINE std::complex predux(const Packet1cd& a) -{ - return pfirst(a); -} - -template<> EIGEN_STRONG_INLINE Packet1cd preduxp(const Packet1cd* vecs) -{ - return vecs[0]; -} +template<> EIGEN_STRONG_INLINE std::complex predux(const Packet1cd& a) { return pfirst(a); } +template<> EIGEN_STRONG_INLINE Packet1cd preduxp(const Packet1cd* vecs) { return vecs[0]; } -template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet1cd& a) -{ - return pfirst(a); -} +template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet1cd& a) { return pfirst(a); } template struct palign_impl @@ -405,6 +412,23 @@ template<> struct conj_helper return pconj(internal::pmul(a, b)); } }; +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(c, pmul(x,y)); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const + { return Packet1cd(internal::pmul(x, y.v)); } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const + { return padd(c, pmul(x,y)); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const + { return Packet1cd(internal::pmul(x.v, y)); } +}; template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) { -- cgit v1.2.3