// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2016 Konstantinos Margaritis // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_PACKET_MATH_ZVECTOR_H #define EIGEN_PACKET_MATH_ZVECTOR_H #include namespace Eigen { namespace internal { #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4 #endif #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD #endif #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD #define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD #endif // NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32 #endif typedef __vector int Packet4i; typedef __vector unsigned int Packet4ui; typedef __vector __bool int Packet4bi; typedef __vector short int Packet8i; typedef __vector unsigned char Packet16uc; typedef __vector double Packet2d; typedef __vector unsigned long long Packet2ul; typedef __vector long long Packet2l; typedef union { int32_t i[4]; uint32_t ui[4]; int64_t l[2]; uint64_t ul[2]; double d[2]; Packet4i v4i; Packet4ui v4ui; Packet2l v2l; Packet2ul v2ul; Packet2d v2d; } Packet; // We don't want to write the same code all the time, but we need to reuse the constants // and it doesn't really work to declare them global, so we define macros instead #define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \ Packet4i p4i_##NAME = reinterpret_cast(vec_splat_s32(X)) #define _EIGEN_DECLARE_CONST_FAST_Packet2d(NAME,X) \ Packet2d p2d_##NAME = reinterpret_cast(vec_splat_s64(X)) #define _EIGEN_DECLARE_CONST_FAST_Packet2l(NAME,X) \ Packet2l p2l_##NAME = reinterpret_cast(vec_splat_s64(X)) #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ Packet4i p4i_##NAME = pset1(X) #define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \ Packet2d p2d_##NAME = pset1(X) #define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \ Packet2l p2l_##NAME = pset1(X) // These constants are endian-agnostic //static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,} static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE, 1); //{ 1, 1, 1, 1} static _EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0); static _EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0); static _EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1); static Packet2d p2d_ONE = { 1.0, 1.0 }; static Packet2d p2d_ZERO_ = { -0.0, -0.0 }; static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 }; static Packet2d p2d_COUNTDOWN = reinterpret_cast(vec_sld(reinterpret_cast(p2d_ZERO), reinterpret_cast(p2d_ONE), 8)); static Packet16uc p16uc_PSET64_HI = { 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 }; static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 }; // Mask alignment #define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0 #define _EIGEN_ALIGNED_PTR(x) ((ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT) // Handle endianness properly while loading constants // Define global static constants: static Packet16uc p16uc_FORWARD = { 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15 }; static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 }; static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; /*static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16}; static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };*/ static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 }; /*static Packet16uc p16uc_TRANSPOSE64_HI = vec_add(p16uc_PSET64_HI, p16uc_HALF64_0_16); //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23}; static Packet16uc p16uc_TRANSPOSE64_LO = vec_add(p16uc_PSET64_LO, p16uc_HALF64_0_16); //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};*/ static Packet16uc p16uc_TRANSPOSE64_HI = { 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23}; static Packet16uc p16uc_TRANSPOSE64_LO = { 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31}; //static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8); //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 }; //static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; #if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC #define EIGEN_ZVECTOR_PREFETCH(ADDR) __builtin_prefetch(ADDR); #else #define EIGEN_ZVECTOR_PREFETCH(ADDR) asm( " pfd [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" ); #endif template<> struct packet_traits : default_packet_traits { typedef Packet4i type; typedef Packet4i half; enum { // FIXME check the Has* Vectorizable = 1, AlignedOnScalar = 1, size = 4, HasHalfPacket = 0, // FIXME check the Has* HasAdd = 1, HasSub = 1, HasMul = 1, HasDiv = 1, HasBlend = 1 }; }; template<> struct packet_traits : default_packet_traits { typedef Packet2d type; typedef Packet2d half; enum { Vectorizable = 1, AlignedOnScalar = 1, size=2, HasHalfPacket = 1, // FIXME check the Has* HasAdd = 1, HasSub = 1, HasMul = 1, HasDiv = 1, HasMin = 1, HasMax = 1, HasAbs = 1, HasSin = 0, HasCos = 0, HasLog = 0, HasExp = 1, HasSqrt = 1, HasRsqrt = 1, HasRound = 1, HasFloor = 1, HasCeil = 1, HasNegate = 1, HasBlend = 1 }; }; template<> struct unpacket_traits { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; template<> struct unpacket_traits { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; }; inline std::ostream & operator <<(std::ostream & s, const Packet4i & v) { Packet vt; vt.v4i = v; s << vt.i[0] << ", " << vt.i[1] << ", " << vt.i[2] << ", " << vt.i[3]; return s; } inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v) { Packet vt; vt.v4ui = v; s << vt.ui[0] << ", " << vt.ui[1] << ", " << vt.ui[2] << ", " << vt.ui[3]; return s; } inline std::ostream & operator <<(std::ostream & s, const Packet2l & v) { Packet vt; vt.v2l = v; s << vt.l[0] << ", " << vt.l[1]; return s; } inline std::ostream & operator <<(std::ostream & s, const Packet2ul & v) { Packet vt; vt.v2ul = v; s << vt.ul[0] << ", " << vt.ul[1] ; return s; } inline std::ostream & operator <<(std::ostream & s, const Packet2d & v) { Packet vt; vt.v2d = v; s << vt.d[0] << ", " << vt.d[1]; return s; } template struct palign_impl { static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second) { switch (Offset % 4) { case 1: first = vec_sld(first, second, 4); break; case 2: first = vec_sld(first, second, 8); break; case 3: first = vec_sld(first, second, 12); break; } } }; template struct palign_impl { static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second) { if (Offset == 1) first = reinterpret_cast(vec_sld(reinterpret_cast(first), reinterpret_cast(second), 8)); } }; template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { // FIXME: No intrinsic yet EIGEN_DEBUG_ALIGNED_LOAD Packet *vfrom; vfrom = (Packet *) from; return vfrom->v4i; } template<> EIGEN_STRONG_INLINE Packet2d pload(const double* from) { // FIXME: No intrinsic yet EIGEN_DEBUG_ALIGNED_LOAD Packet *vfrom; vfrom = (Packet *) from; return vfrom->v2d; } template<> EIGEN_STRONG_INLINE void pstore(int* to, const Packet4i& from) { // FIXME: No intrinsic yet EIGEN_DEBUG_ALIGNED_STORE Packet *vto; vto = (Packet *) to; vto->v4i = from; } template<> EIGEN_STRONG_INLINE void pstore(double* to, const Packet2d& from) { // FIXME: No intrinsic yet EIGEN_DEBUG_ALIGNED_STORE Packet *vto; vto = (Packet *) to; vto->v2d = from; } template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { return vec_splats(from); } template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { return vec_splats(from); } template<> EIGEN_STRONG_INLINE void pbroadcast4(const int *a, Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3) { a3 = pload(a); a0 = vec_splat(a3, 0); a1 = vec_splat(a3, 1); a2 = vec_splat(a3, 2); a3 = vec_splat(a3, 3); } template<> EIGEN_STRONG_INLINE void pbroadcast4(const double *a, Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3) { a1 = pload(a); a0 = vec_splat(a1, 0); a1 = vec_splat(a1, 1); a3 = pload(a+2); a2 = vec_splat(a3, 0); a3 = vec_splat(a3, 1); } template<> EIGEN_DEVICE_FUNC inline Packet4i pgather(const int* from, Index stride) { int EIGEN_ALIGN16 ai[4]; ai[0] = from[0*stride]; ai[1] = from[1*stride]; ai[2] = from[2*stride]; ai[3] = from[3*stride]; return pload(ai); } template<> EIGEN_DEVICE_FUNC inline Packet2d pgather(const double* from, Index stride) { double EIGEN_ALIGN16 af[2]; af[0] = from[0*stride]; af[1] = from[1*stride]; return pload(af); } template<> EIGEN_DEVICE_FUNC inline void pscatter(int* to, const Packet4i& from, Index stride) { int EIGEN_ALIGN16 ai[4]; pstore((int *)ai, from); to[0*stride] = ai[0]; to[1*stride] = ai[1]; to[2*stride] = ai[2]; to[3*stride] = ai[3]; } template<> EIGEN_DEVICE_FUNC inline void pscatter(double* to, const Packet2d& from, Index stride) { double EIGEN_ALIGN16 af[2]; pstore(af, from); to[0*stride] = af[0]; to[1*stride] = af[1]; } template<> EIGEN_STRONG_INLINE Packet4i padd(const Packet4i& a, const Packet4i& b) { return (a + b); } template<> EIGEN_STRONG_INLINE Packet2d padd(const Packet2d& a, const Packet2d& b) { return (a + b); } template<> EIGEN_STRONG_INLINE Packet4i psub(const Packet4i& a, const Packet4i& b) { return (a - b); } template<> EIGEN_STRONG_INLINE Packet2d psub(const Packet2d& a, const Packet2d& b) { return (a - b); } template<> EIGEN_STRONG_INLINE Packet4i pmul(const Packet4i& a, const Packet4i& b) { return (a * b); } template<> EIGEN_STRONG_INLINE Packet2d pmul(const Packet2d& a, const Packet2d& b) { return (a * b); } template<> EIGEN_STRONG_INLINE Packet4i pdiv(const Packet4i& a, const Packet4i& b) { return (a / b); } template<> EIGEN_STRONG_INLINE Packet2d pdiv(const Packet2d& a, const Packet2d& b) { return (a / b); } template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return (-a); } template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return (-a); } template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; } template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; } template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a, b), c); } template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); } template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { return padd(pset1(a), p4i_COUNTDOWN); } template<> EIGEN_STRONG_INLINE Packet2d plset(const double& a) { return padd(pset1(a), p2d_COUNTDOWN); } template<> EIGEN_STRONG_INLINE Packet4i pmin(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); } template<> EIGEN_STRONG_INLINE Packet2d pmin(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pmax(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet2d pmax(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pand(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); } template<> EIGEN_STRONG_INLINE Packet2d pand(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); } template<> EIGEN_STRONG_INLINE Packet4i por(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); } template<> EIGEN_STRONG_INLINE Packet2d por(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pxor(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); } template<> EIGEN_STRONG_INLINE Packet2d pxor(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, const Packet4i& b) { return pand(a, vec_nor(b, b)); } template<> EIGEN_STRONG_INLINE Packet2d pandnot(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); } template<> EIGEN_STRONG_INLINE Packet2d pround(const Packet2d& a) { return vec_round(a); } template<> EIGEN_STRONG_INLINE Packet2d pceil(const Packet2d& a) { return vec_ceil(a); } template<> EIGEN_STRONG_INLINE Packet2d pfloor(const Packet2d& a) { return vec_floor(a); } template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) { return pload(from); } template<> EIGEN_STRONG_INLINE Packet2d ploadu(const double* from) { return pload(from); } template<> EIGEN_STRONG_INLINE Packet4i ploaddup(const int* from) { Packet4i p = pload(from); return vec_perm(p, p, p16uc_DUPLICATE32_HI); } template<> EIGEN_STRONG_INLINE Packet2d ploaddup(const double* from) { Packet2d p = pload(from); return vec_perm(p, p, p16uc_PSET64_HI); } template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& from) { pstore(to, from); } template<> EIGEN_STRONG_INLINE void pstoreu(double* to, const Packet2d& from) { pstore(to, from); } template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { EIGEN_ZVECTOR_PREFETCH(addr); } template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); } template<> EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; } template<> EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; } template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return reinterpret_cast(vec_perm(reinterpret_cast(a), reinterpret_cast(a), p16uc_REVERSE32)); } template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return reinterpret_cast(vec_perm(reinterpret_cast(a), reinterpret_cast(a), p16uc_REVERSE64)); } template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); } template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); } template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) { Packet4i b, sum; b = vec_sld(a, a, 8); sum = padd(a, b); b = vec_sld(sum, sum, 4); sum = padd(sum, b); return pfirst(sum); } template<> EIGEN_STRONG_INLINE double predux(const Packet2d& a) { Packet2d b, sum; b = reinterpret_cast(vec_sld(reinterpret_cast(a), reinterpret_cast(a), 8)); sum = padd(a, b); return pfirst(sum); } template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) { Packet4i v[4], sum[4]; // It's easier and faster to transpose then add as columns // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation // Do the transpose, first set of moves v[0] = vec_mergeh(vecs[0], vecs[2]); v[1] = vec_mergel(vecs[0], vecs[2]); v[2] = vec_mergeh(vecs[1], vecs[3]); v[3] = vec_mergel(vecs[1], vecs[3]); // Get the resulting vectors sum[0] = vec_mergeh(v[0], v[2]); sum[1] = vec_mergel(v[0], v[2]); sum[2] = vec_mergeh(v[1], v[3]); sum[3] = vec_mergel(v[1], v[3]); // Now do the summation: // Lines 0+1 sum[0] = padd(sum[0], sum[1]); // Lines 2+3 sum[1] = padd(sum[2], sum[3]); // Add the results sum[0] = padd(sum[0], sum[1]); return sum[0]; } template<> EIGEN_STRONG_INLINE Packet2d preduxp(const Packet2d* vecs) { Packet2d v[2], sum; v[0] = padd(vecs[0], reinterpret_cast(vec_sld(reinterpret_cast(vecs[0]), reinterpret_cast(vecs[0]), 8))); v[1] = padd(vecs[1], reinterpret_cast(vec_sld(reinterpret_cast(vecs[1]), reinterpret_cast(vecs[1]), 8))); sum = reinterpret_cast(vec_sld(reinterpret_cast(v[0]), reinterpret_cast(v[1]), 8)); return sum; } // Other reduction functions: // mul template<> EIGEN_STRONG_INLINE int predux_mul(const Packet4i& a) { EIGEN_ALIGN16 int aux[4]; pstore(aux, a); return aux[0] * aux[1] * aux[2] * aux[3]; } template<> EIGEN_STRONG_INLINE double predux_mul(const Packet2d& a) { return pfirst(pmul(a, reinterpret_cast(vec_sld(reinterpret_cast(a), reinterpret_cast(a), 8)))); } // min template<> EIGEN_STRONG_INLINE int predux_min(const Packet4i& a) { Packet4i b, res; b = pmin(a, vec_sld(a, a, 8)); res = pmin(b, vec_sld(b, b, 4)); return pfirst(res); } template<> EIGEN_STRONG_INLINE double predux_min(const Packet2d& a) { return pfirst(pmin(a, reinterpret_cast(vec_sld(reinterpret_cast(a), reinterpret_cast(a), 8)))); } // max template<> EIGEN_STRONG_INLINE int predux_max(const Packet4i& a) { Packet4i b, res; b = pmax(a, vec_sld(a, a, 8)); res = pmax(b, vec_sld(b, b, 4)); return pfirst(res); } // max template<> EIGEN_STRONG_INLINE double predux_max(const Packet2d& a) { return pfirst(pmax(a, reinterpret_cast(vec_sld(reinterpret_cast(a), reinterpret_cast(a), 8)))); } EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { Packet4i t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]); Packet4i t1 = vec_mergel(kernel.packet[0], kernel.packet[2]); Packet4i t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]); Packet4i t3 = vec_mergel(kernel.packet[1], kernel.packet[3]); kernel.packet[0] = vec_mergeh(t0, t2); kernel.packet[1] = vec_mergel(t0, t2); kernel.packet[2] = vec_mergeh(t1, t3); kernel.packet[3] = vec_mergel(t1, t3); } EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { Packet2d t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI); Packet2d t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO); kernel.packet[0] = t0; kernel.packet[1] = t1; } template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) { Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] }; Packet4ui mask = vec_cmpeq(select, reinterpret_cast(p4i_ONE)); return vec_sel(elsePacket, thenPacket, mask); } template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) { Packet2ul select = { ifPacket.select[0], ifPacket.select[1] }; Packet2ul mask = vec_cmpeq(select, reinterpret_cast(p2l_ONE)); return vec_sel(elsePacket, thenPacket, mask); } } // end namespace internal } // end namespace Eigen #endif // EIGEN_PACKET_MATH_ZVECTOR_H