// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2010 Gael Guennebaud // Copyright (C) 2010-2016 Konstantinos Margaritis // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_COMPLEX32_ALTIVEC_H #define EIGEN_COMPLEX32_ALTIVEC_H namespace Eigen { namespace internal { static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; #ifdef __VSX__ #if defined(_BIG_ENDIAN) static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; #else static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; #endif #endif //---------- float ---------- struct Packet2cf { EIGEN_STRONG_INLINE explicit Packet2cf() {} EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {} EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) { Packet4f v1, v2; // Permute and multiply the real parts of a and b v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD); // Get the imaginary parts of a v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN); // multiply a_re * b v1 = vec_madd(v1, b.v, p4f_ZERO); // multiply a_im * b and get the conjugate result v2 = vec_madd(v2, b.v, p4f_ZERO); v2 = reinterpret_cast(pxor(v2, reinterpret_cast(p4ui_CONJ_XOR))); // permute back to a proper order v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV); return Packet2cf(padd(v1, v2)); } EIGEN_STRONG_INLINE Packet2cf& operator*=(const Packet2cf& b) { v = pmul(Packet2cf(*this), b).v; return *this; } EIGEN_STRONG_INLINE Packet2cf operator*(const Packet2cf& b) const { return Packet2cf(*this) *= b; } EIGEN_STRONG_INLINE Packet2cf& operator+=(const Packet2cf& b) { v = padd(v, b.v); return *this; } EIGEN_STRONG_INLINE Packet2cf operator+(const Packet2cf& b) const { return Packet2cf(*this) += b; } EIGEN_STRONG_INLINE Packet2cf& operator-=(const Packet2cf& b) { v = psub(v, b.v); return *this; } EIGEN_STRONG_INLINE Packet2cf operator-(const Packet2cf& b) const { return Packet2cf(*this) -= b; } EIGEN_STRONG_INLINE Packet2cf operator-(void) const { return Packet2cf(vec_neg(v)); } Packet4f v; }; template<> struct packet_traits > : default_packet_traits { typedef Packet2cf type; typedef Packet2cf half; typedef Packet4f as_real; enum { Vectorizable = 1, AlignedOnScalar = 1, size = 2, HasHalfPacket = 0, HasAdd = 1, HasSub = 1, HasMul = 1, HasDiv = 1, HasNegate = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, HasMax = 0, #ifdef __VSX__ HasBlend = 1, #endif HasSetLinear = 0 }; }; template<> struct unpacket_traits { typedef std::complex type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; typedef Packet4f as_real; }; template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex& from) { Packet2cf res; if((std::ptrdiff_t(&from) % 16) == 0) res.v = pload((const float *)&from); else res.v = ploadu((const float *)&from); res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI); return res; } template<> EIGEN_STRONG_INLINE Packet2cf pload(const std::complex* from) { return Packet2cf(pload((const float *) from)); } template<> EIGEN_STRONG_INLINE Packet2cf ploadu(const std::complex* from) { return Packet2cf(ploadu((const float*) from)); } template<> EIGEN_STRONG_INLINE Packet2cf ploaddup(const std::complex* from) { return pset1(*from); } template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet2cf& from) { pstore((float*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet2cf& from) { pstoreu((float*)to, from.v); } EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex* from0, const std::complex* from1) { Packet4f res0, res1; #ifdef __VSX__ __asm__ ("lxsdx %x0,%y1" : "=wa" (res0) : "Z" (*from0)); __asm__ ("lxsdx %x0,%y1" : "=wa" (res1) : "Z" (*from1)); #ifdef _BIG_ENDIAN __asm__ ("xxpermdi %x0, %x1, %x2, 0" : "=wa" (res0) : "wa" (res0), "wa" (res1)); #else __asm__ ("xxpermdi %x0, %x2, %x1, 0" : "=wa" (res0) : "wa" (res0), "wa" (res1)); #endif #else *reinterpret_cast *>(&res0) = *from0; *reinterpret_cast *>(&res1) = *from1; res0 = vec_perm(res0, res1, p16uc_TRANSPOSE64_HI); #endif return Packet2cf(res0); } template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather, Packet2cf>(const std::complex* from, Index stride) { EIGEN_ALIGN16 std::complex af[2]; af[0] = from[0*stride]; af[1] = from[1*stride]; return pload(af); } template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet2cf>(std::complex* to, const Packet2cf& from, Index stride) { EIGEN_ALIGN16 std::complex af[2]; pstore >((std::complex *) af, from); to[0*stride] = af[0]; to[1*stride] = af[1]; } template<> EIGEN_STRONG_INLINE Packet2cf padd(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v + b.v); } template<> EIGEN_STRONG_INLINE Packet2cf psub(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v - b.v); } template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); } template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor(a.v, reinterpret_cast(p4ui_CONJ_XOR))); } template<> EIGEN_STRONG_INLINE Packet2cf pand (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand(a.v, b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf por (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por(a.v, b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf pxor (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor(a.v, b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf pandnot(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot(a.v, b.v)); } template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_PPC_PREFETCH(addr); } template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet2cf& a) { EIGEN_ALIGN16 std::complex res[2]; pstore((float *)&res, a.v); return res[0]; } template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { Packet4f rev_a; rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2); return Packet2cf(rev_a); } template<> EIGEN_STRONG_INLINE std::complex predux(const Packet2cf& a) { Packet4f b; b = vec_sld(a.v, a.v, 8); b = padd(a.v, b); return pfirst(Packet2cf(b)); } template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet2cf& a) { Packet4f b; Packet2cf prod; b = vec_sld(a.v, a.v, 8); prod = pmul(a, Packet2cf(b)); return pfirst(prod); } EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) { // TODO optimize it for AltiVec Packet2cf res = pmul(a, pconj(b)); Packet4f s = pmul(b.v, b.v); return Packet2cf(pdiv(res.v, padd(s, vec_perm(s, s, p16uc_COMPLEX32_REV)))); } template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x) { return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV)); } EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI); kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO); kernel.packet[0].v = tmp; } template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) { Packet4f eq = reinterpret_cast(vec_cmpeq(a.v,b.v)); return Packet2cf(vec_and(eq, vec_perm(eq, eq, p16uc_COMPLEX32_REV))); } #ifdef __VSX__ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) { Packet2cf result; result.v = reinterpret_cast(pblend(ifPacket, reinterpret_cast(thenPacket.v), reinterpret_cast(elsePacket.v))); return result; } #endif template<> EIGEN_STRONG_INLINE Packet2cf psqrt(const Packet2cf& a) { return psqrt_complex(a); } //---------- double ---------- #ifdef __VSX__ struct Packet1cd { EIGEN_STRONG_INLINE Packet1cd() {} EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {} EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) { Packet2d a_re, a_im, v1, v2; // Permute and multiply the real parts of a and b a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI); // Get the imaginary parts of a a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO); // multiply a_re * b v1 = vec_madd(a_re, b.v, p2d_ZERO); // multiply a_im * b and get the conjugate result v2 = vec_madd(a_im, b.v, p2d_ZERO); v2 = reinterpret_cast(vec_sld(reinterpret_cast(v2), reinterpret_cast(v2), 8)); v2 = pxor(v2, reinterpret_cast(p2ul_CONJ_XOR1)); return Packet1cd(padd(v1, v2)); } EIGEN_STRONG_INLINE Packet1cd& operator*=(const Packet1cd& b) { v = pmul(Packet1cd(*this), b).v; return *this; } EIGEN_STRONG_INLINE Packet1cd operator*(const Packet1cd& b) const { return Packet1cd(*this) *= b; } EIGEN_STRONG_INLINE Packet1cd& operator+=(const Packet1cd& b) { v = padd(v, b.v); return *this; } EIGEN_STRONG_INLINE Packet1cd operator+(const Packet1cd& b) const { return Packet1cd(*this) += b; } EIGEN_STRONG_INLINE Packet1cd& operator-=(const Packet1cd& b) { v = psub(v, b.v); return *this; } EIGEN_STRONG_INLINE Packet1cd operator-(const Packet1cd& b) const { return Packet1cd(*this) -= b; } EIGEN_STRONG_INLINE Packet1cd operator-(void) const { return Packet1cd(vec_neg(v)); } Packet2d v; }; template<> struct packet_traits > : default_packet_traits { typedef Packet1cd type; typedef Packet1cd half; typedef Packet2d as_real; enum { Vectorizable = 1, AlignedOnScalar = 0, size = 1, HasHalfPacket = 0, HasAdd = 1, HasSub = 1, HasMul = 1, HasDiv = 1, HasNegate = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, HasMax = 0, HasSetLinear = 0 }; }; template<> struct unpacket_traits { typedef std::complex type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; typedef Packet2d as_real; }; template<> EIGEN_STRONG_INLINE Packet1cd pload (const std::complex* from) { return Packet1cd(pload((const double*)from)); } template<> EIGEN_STRONG_INLINE Packet1cd ploadu(const std::complex* from) { return Packet1cd(ploadu((const double*)from)); } template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet1cd& from) { pstore((double*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet1cd& from) { pstoreu((double*)to, from.v); } template<> EIGEN_STRONG_INLINE Packet1cd pset1(const std::complex& from) { /* here we really have to use unaligned loads :( */ return ploadu(&from); } template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather, Packet1cd>(const std::complex* from, Index) { return pload(from); } template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet1cd>(std::complex* to, const Packet1cd& from, Index) { pstore >(to, from); } template<> EIGEN_STRONG_INLINE Packet1cd padd(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); } template<> EIGEN_STRONG_INLINE Packet1cd psub(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); } template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); } template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(pxor(a.v, reinterpret_cast(p2ul_CONJ_XOR2))); } template<> EIGEN_STRONG_INLINE Packet1cd pand (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd por (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(por(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd pxor (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pxor(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd pandnot(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pandnot(a.v, b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd ploaddup(const std::complex* from) { return pset1(*from); } template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_PPC_PREFETCH(addr); } template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet1cd& a) { EIGEN_ALIGN16 std::complex res[2]; pstore >(res, a); return res[0]; } template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; } template<> EIGEN_STRONG_INLINE std::complex predux(const Packet1cd& a) { return pfirst(a); } template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet1cd& a) { return pfirst(a); } EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) { // TODO optimize it for AltiVec Packet1cd res = pmul(a,pconj(b)); Packet2d s = pmul(b.v, b.v); return Packet1cd(pdiv(res.v, padd(s, vec_perm(s, s, p16uc_REVERSE64)))); } EIGEN_STRONG_INLINE Packet1cd pcplxflip/**/(const Packet1cd& x) { return Packet1cd(preverse(Packet2d(x.v))); } EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI); kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO); kernel.packet[0].v = tmp; } template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) { // Compare real and imaginary parts of a and b to get the mask vector: // [re(a)==re(b), im(a)==im(b)] Packet2d eq = reinterpret_cast(vec_cmpeq(a.v,b.v)); // Swap real/imag elements in the mask in to get: // [im(a)==im(b), re(a)==re(b)] Packet2d eq_swapped = reinterpret_cast(vec_sld(reinterpret_cast(eq), reinterpret_cast(eq), 8)); // Return re(a)==re(b) & im(a)==im(b) by computing bitwise AND of eq and eq_swapped return Packet1cd(vec_and(eq, eq_swapped)); } template<> EIGEN_STRONG_INLINE Packet1cd psqrt(const Packet1cd& a) { return psqrt_complex(a); } #endif // __VSX__ } // end namespace internal } // end namespace Eigen #endif // EIGEN_COMPLEX32_ALTIVEC_H