// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2010 Gael Guennebaud // Copyright (C) 2016 Konstantinos Margaritis // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_COMPLEX32_ALTIVEC_H #define EIGEN_COMPLEX32_ALTIVEC_H namespace Eigen { namespace internal { #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12) static Packet4ui p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; //vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO); #endif static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 }; struct Packet1cd { EIGEN_STRONG_INLINE Packet1cd() {} EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {} Packet2d v; }; struct Packet2cf { EIGEN_STRONG_INLINE Packet2cf() {} EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {} #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 12) union { Packet4f v; Packet1cd cd[2]; }; #else Packet4f v; #endif }; template<> struct packet_traits > : default_packet_traits { typedef Packet2cf type; typedef Packet2cf half; enum { Vectorizable = 1, AlignedOnScalar = 1, size = 2, HasHalfPacket = 0, HasAdd = 1, HasSub = 1, HasMul = 1, HasDiv = 1, HasNegate = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, HasMax = 0, HasBlend = 1, HasSetLinear = 0 }; }; template<> struct packet_traits > : default_packet_traits { typedef Packet1cd type; typedef Packet1cd half; enum { Vectorizable = 1, AlignedOnScalar = 1, size = 1, HasHalfPacket = 0, HasAdd = 1, HasSub = 1, HasMul = 1, HasDiv = 1, HasNegate = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, HasMax = 0, HasSetLinear = 0 }; }; template<> struct unpacket_traits { typedef std::complex type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; }; template<> struct unpacket_traits { typedef std::complex type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; }; /* Forward declaration */ EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel); /* complex first */ template<> EIGEN_STRONG_INLINE Packet1cd pload (const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload((const double*)from)); } template<> EIGEN_STRONG_INLINE Packet1cd ploadu(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu((const double*)from)); } template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } template<> EIGEN_STRONG_INLINE Packet1cd pset1(const std::complex& from) { /* here we really have to use unaligned loads :( */ return ploadu(&from); } template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather, Packet1cd>(const std::complex* from, Index stride EIGEN_UNUSED) { return pload(from); } template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet1cd>(std::complex* to, const Packet1cd& from, Index stride EIGEN_UNUSED) { pstore >(to, from); } template<> EIGEN_STRONG_INLINE Packet1cd padd(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); } template<> EIGEN_STRONG_INLINE Packet1cd psub(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); } template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); } template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); } template<> EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) { Packet2d a_re, a_im, v1, v2; // Permute and multiply the real parts of a and b a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI); // Get the imaginary parts of a a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO); // multiply a_re * b v1 = vec_madd(a_re, b.v, p2d_ZERO); // multiply a_im * b and get the conjugate result v2 = vec_madd(a_im, b.v, p2d_ZERO); v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8); v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1); return Packet1cd(v1 + v2); } template<> EIGEN_STRONG_INLINE Packet1cd pand (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd por (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd pxor (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd pandnot (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); } template<> EIGEN_STRONG_INLINE Packet1cd ploaddup(const std::complex* from) { return pset1(*from); } template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) { Packet2d eq = vec_cmpeq (a.v, b.v); Packet2d tmp = { eq[1], eq[0] }; return (Packet1cd)pand(eq, tmp); } template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_ZVECTOR_PREFETCH(addr); } template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet1cd& a) { std::complex EIGEN_ALIGN16 res; pstore >(&res, a); return res; } template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; } template<> EIGEN_STRONG_INLINE std::complex predux(const Packet1cd& a) { return pfirst(a); } template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet1cd& a) { return pfirst(a); } EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) { // TODO optimize it for AltiVec Packet1cd res = pmul(a,pconj(b)); Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_); return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64))); } EIGEN_STRONG_INLINE Packet1cd pcplxflip/**/(const Packet1cd& x) { return Packet1cd(preverse(Packet2d(x.v))); } EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI); kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO); kernel.packet[0].v = tmp; } /* complex follows */ template<> EIGEN_STRONG_INLINE Packet2cf pload (const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload((const float*)from)); } template<> EIGEN_STRONG_INLINE Packet2cf ploadu(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu((const float*)from)); } template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet2cf& a) { std::complex EIGEN_ALIGN16 res[2]; pstore >(res, a); return res[0]; } #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 12) template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex& from) { Packet2cf res; res.cd[0] = Packet1cd(vec_ld2f((const float *)&from)); res.cd[1] = res.cd[0]; return res; } #else template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex& from) { Packet2cf res; if((std::ptrdiff_t(&from) % 16) == 0) res.v = pload((const float *)&from); else res.v = ploadu((const float *)&from); res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI); return res; } #endif template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather, Packet2cf>(const std::complex* from, Index stride) { std::complex EIGEN_ALIGN16 af[2]; af[0] = from[0*stride]; af[1] = from[1*stride]; return pload(af); } template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet2cf>(std::complex* to, const Packet2cf& from, Index stride) { std::complex EIGEN_ALIGN16 af[2]; pstore >((std::complex *) af, from); to[0*stride] = af[0]; to[1*stride] = af[1]; } template<> EIGEN_STRONG_INLINE Packet2cf padd(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd(a.v, b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf psub(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub(a.v, b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(Packet4f(a.v))); } template<> EIGEN_STRONG_INLINE Packet2cf pand (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf por (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf pxor (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf pandnot(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf ploaddup(const std::complex* from) { return pset1(*from); } template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_ZVECTOR_PREFETCH(addr); } #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 12) template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) { Packet4f eq = pcmp_eq (a.v, b.v); Packet2cf res; Packet2d tmp1 = { eq.v4f[0][1], eq.v4f[0][0] }; Packet2d tmp2 = { eq.v4f[1][1], eq.v4f[1][0] }; res.v.v4f[0] = pand(eq.v4f[0], tmp1); res.v.v4f[1] = pand(eq.v4f[1], tmp2); return res; } template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { Packet2cf res; res.v.v4f[0] = pconj(Packet1cd(reinterpret_cast(a.v.v4f[0]))).v; res.v.v4f[1] = pconj(Packet1cd(reinterpret_cast(a.v.v4f[1]))).v; return res; } template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) { Packet2cf res; res.v.v4f[0] = pmul(Packet1cd(reinterpret_cast(a.v.v4f[0])), Packet1cd(reinterpret_cast(b.v.v4f[0]))).v; res.v.v4f[1] = pmul(Packet1cd(reinterpret_cast(a.v.v4f[1])), Packet1cd(reinterpret_cast(b.v.v4f[1]))).v; return res; } template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { Packet2cf res; res.cd[0] = a.cd[1]; res.cd[1] = a.cd[0]; return res; } template<> EIGEN_STRONG_INLINE std::complex predux(const Packet2cf& a) { std::complex res; Packet1cd b = padd(a.cd[0], a.cd[1]); vec_st2f(b.v, (float*)&res); return res; } template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet2cf& a) { std::complex res; Packet1cd b = pmul(a.cd[0], a.cd[1]); vec_st2f(b.v, (float*)&res); return res; } EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) { // TODO optimize it for AltiVec Packet2cf res; res.cd[0] = pdiv(a.cd[0], b.cd[0]); res.cd[1] = pdiv(a.cd[1], b.cd[1]); return res; } EIGEN_STRONG_INLINE Packet2cf pcplxflip/**/(const Packet2cf& x) { Packet2cf res; res.cd[0] = pcplxflip(x.cd[0]); res.cd[1] = pcplxflip(x.cd[1]); return res; } EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { Packet1cd tmp = kernel.packet[0].cd[1]; kernel.packet[0].cd[1] = kernel.packet[1].cd[0]; kernel.packet[1].cd[0] = tmp; } template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) { Packet2cf result; const Selector<4> ifPacket4 = { ifPacket.select[0], ifPacket.select[0], ifPacket.select[1], ifPacket.select[1] }; result.v = pblend(ifPacket4, thenPacket.v, elsePacket.v); return result; } #else template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) { Packet4f eq = vec_cmpeq (a.v, b.v); Packet4f tmp = { eq[1], eq[0], eq[3], eq[2] }; return (Packet2cf)pand(eq, tmp); } template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor(a.v, reinterpret_cast(p4ui_CONJ_XOR))); } template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) { Packet4f a_re, a_im, prod, prod_im; // Permute and multiply the real parts of a and b a_re = vec_perm(a.v, a.v, p16uc_PSET32_WODD); // Get the imaginary parts of a a_im = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN); // multiply a_im * b and get the conjugate result prod_im = a_im * b.v; prod_im = pxor(prod_im, reinterpret_cast(p4ui_CONJ_XOR)); // permute back to a proper order prod_im = vec_perm(prod_im, prod_im, p16uc_COMPLEX32_REV); // multiply a_re * b, add prod_im prod = pmadd(a_re, b.v, prod_im); return Packet2cf(prod); } template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { Packet4f rev_a; rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2); return Packet2cf(rev_a); } template<> EIGEN_STRONG_INLINE std::complex predux(const Packet2cf& a) { Packet4f b; b = vec_sld(a.v, a.v, 8); b = padd(a.v, b); return pfirst(Packet2cf(b)); } template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet2cf& a) { Packet4f b; Packet2cf prod; b = vec_sld(a.v, a.v, 8); prod = pmul(a, Packet2cf(b)); return pfirst(prod); } EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) { // TODO optimize it for AltiVec Packet2cf res = pmul(a, pconj(b)); Packet4f s = pmul(b.v, b.v); return Packet2cf(pdiv(res.v, padd(s, vec_perm(s, s, p16uc_COMPLEX32_REV)))); } template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x) { return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV)); } EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI); kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO); kernel.packet[0].v = tmp; } template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) { Packet2cf result; result.v = reinterpret_cast(pblend(ifPacket, reinterpret_cast(thenPacket.v), reinterpret_cast(elsePacket.v))); return result; } #endif } // end namespace internal } // end namespace Eigen #endif // EIGEN_COMPLEX32_ALTIVEC_H