diff options
Diffstat (limited to 'Eigen/src/Core/arch/AltiVec/PacketMath.h')
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/PacketMath.h | 189 |
1 files changed, 97 insertions, 92 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 8205beae5..88a8e3669 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -25,6 +25,8 @@ #ifndef EIGEN_PACKET_MATH_ALTIVEC_H #define EIGEN_PACKET_MATH_ALTIVEC_H +namespace internal { + #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4 #endif @@ -53,38 +55,38 @@ typedef __vector unsigned char Packet16uc; // and it doesn't really work to declare them global, so we define macros instead #define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \ - Packet4f ei_p4f_##NAME = (Packet4f) vec_splat_s32(X) + Packet4f p4f_##NAME = (Packet4f) vec_splat_s32(X) #define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \ - Packet4i ei_p4i_##NAME = vec_splat_s32(X) + Packet4i p4i_##NAME = vec_splat_s32(X) #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ - Packet4f ei_p4f_##NAME = ei_pset1<Packet4f>(X) + Packet4f p4f_##NAME = pset1<Packet4f>(X) #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ - Packet4f ei_p4f_##NAME = vreinterpretq_f32_u32(ei_pset1<int>(X)) + Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X)) #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ - Packet4i ei_p4i_##NAME = ei_pset1<Packet4i>(X) + Packet4i p4i_##NAME = pset1<Packet4i>(X) #define DST_CHAN 1 #define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride)) // Define global static constants: -static Packet4f ei_p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 }; -static Packet4i ei_p4i_COUNTDOWN = { 3, 2, 1, 0 }; -static Packet16uc ei_p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; -static Packet16uc ei_p16uc_FORWARD = vec_lvsl(0, (float*)0); +static Packet4f p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 }; +static Packet4i p4i_COUNTDOWN = { 3, 2, 1, 0 }; +static Packet16uc p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; +static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); -static Packet4f ei_p4f_ONE = vec_ctf(ei_p4i_ONE, 0); -static Packet4f ei_p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)ei_p4i_MINUS1, (Packet4ui)ei_p4i_MINUS1); +static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); +static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); -template<> struct ei_packet_traits<float> : ei_default_packet_traits +template<> struct packet_traits<float> : default_packet_traits { typedef Packet4f type; enum { @@ -100,7 +102,7 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits HasSqrt = 0 }; }; -template<> struct ei_packet_traits<int> : ei_default_packet_traits +template<> struct packet_traits<int> : default_packet_traits { typedef Packet4i type; enum { @@ -111,8 +113,8 @@ template<> struct ei_packet_traits<int> : ei_default_packet_traits }; }; -template<> struct ei_unpacket_traits<Packet4f> { typedef float type; enum {size=4}; }; -template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size=4}; }; +template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; }; +template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; }; /* inline std::ostream & operator <<(std::ostream & s, const Packet4f & v) { @@ -158,7 +160,7 @@ inline std::ostream & operator <<(std::ostream & s, const Packetbi & v) return s; } */ -template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) { +template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html float EIGEN_ALIGN16 af[4]; af[0] = from; @@ -167,7 +169,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) { return vc; } -template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<Packet4i>(const int& from) { +template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { int EIGEN_ALIGN16 ai[4]; ai[0] = from; Packet4i vc = vec_ld(0, ai); @@ -175,22 +177,22 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<Packet4i>(const int& from) return vc; } -template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) { return vec_add(ei_pset1<Packet4f>(a), ei_p4f_COUNTDOWN); } -template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) { return vec_add(ei_pset1<Packet4i>(a), ei_p4i_COUNTDOWN); } +template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return vec_add(pset1<Packet4f>(a), p4f_COUNTDOWN); } +template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return vec_add(pset1<Packet4i>(a), p4i_COUNTDOWN); } -template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i ei_padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); } +template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f ei_psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_sub(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i ei_psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_sub(a,b); } +template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_sub(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_sub(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pnegate(const Packet4f& a) { return ei_psub<Packet4f>(ei_p4f_ZERO, a); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pnegate(const Packet4i& a) { return ei_psub<Packet4i>(ei_p4i_ZERO, a); } +template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return psub<Packet4f>(p4f_ZERO, a); } +template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return psub<Packet4i>(p4i_ZERO, a); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b,ei_p4f_ZERO); } +template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b,p4f_ZERO); } /* Commented out: it's actually slower than processing it scalar * -template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b) +template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { // Detailed in: http://freevec.org/content/32bit_signed_integer_multiplication_altivec //Set up constants, variables @@ -201,21 +203,21 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, con b1 = vec_abs(b); // Get the signs using xor - Packet4bi sgn = (Packet4bi) vec_cmplt(vec_xor(a, b), ei_p4i_ZERO); + Packet4bi sgn = (Packet4bi) vec_cmplt(vec_xor(a, b), p4i_ZERO); // Do the multiplication for the asbolute values. - bswap = (Packet4i) vec_rl((Packet4ui) b1, (Packet4ui) ei_p4i_MINUS16 ); + bswap = (Packet4i) vec_rl((Packet4ui) b1, (Packet4ui) p4i_MINUS16 ); low_prod = vec_mulo((Packet8i) a1, (Packet8i)b1); - high_prod = vec_msum((Packet8i) a1, (Packet8i) bswap, ei_p4i_ZERO); - high_prod = (Packet4i) vec_sl((Packet4ui) high_prod, (Packet4ui) ei_p4i_MINUS16); + high_prod = vec_msum((Packet8i) a1, (Packet8i) bswap, p4i_ZERO); + high_prod = (Packet4i) vec_sl((Packet4ui) high_prod, (Packet4ui) p4i_MINUS16); prod = vec_add( low_prod, high_prod ); // NOR the product and select only the negative elements according to the sign mask prod_ = vec_nor(prod, prod); - prod_ = vec_sel(ei_p4i_ZERO, prod_, sgn); + prod_ = vec_sel(p4i_ZERO, prod_, sgn); // Add 1 to the result to get the negative numbers - v1sel = vec_sel(ei_p4i_ZERO, ei_p4i_ONE, sgn); + v1sel = vec_sel(p4i_ZERO, p4i_ONE, sgn); prod_ = vec_add(prod_, v1sel); // Merge the results back to the final vector. @@ -224,7 +226,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, con return prod; } */ -template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) +template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { Packet4f t, y_0, y_1, res; @@ -232,45 +234,45 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, con y_0 = vec_re(b); // Do one Newton-Raphson iteration to get the needed accuracy - t = vec_nmsub(y_0, b, ei_p4f_ONE); + t = vec_nmsub(y_0, b, p4f_ONE); y_1 = vec_madd(y_0, t, y_0); - res = vec_madd(a, y_1, ei_p4f_ZERO); + res = vec_madd(a, y_1, p4f_ZERO); return res; } -template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/) -{ ei_assert(false && "packet integer division are not supported by AltiVec"); - return ei_pset1<Packet4i>(0); +template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/) +{ eigen_assert(false && "packet integer division are not supported by AltiVec"); + return pset1<Packet4i>(0); } // for some weird raisons, it has to be overloaded for packet of integers -template<> EIGEN_STRONG_INLINE Packet4f ei_pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a, b, c); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return ei_padd(ei_pmul(a,b), c); } +template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a, b, c); } +template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); } +template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); } +template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); } +template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); } +template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); } // Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics -template<> EIGEN_STRONG_INLINE Packet4f ei_pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); } +template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); } +template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); } -template<> EIGEN_STRONG_INLINE Packet4f ei_por<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_or(a, b); } -template<> EIGEN_STRONG_INLINE Packet4i ei_por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); } +template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_or(a, b); } +template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); } +template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); } +template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); } +template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); } +template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } +template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } +template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } -template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu<Packet4f>(const float* from) +template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html @@ -282,7 +284,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu<Packet4f>(const float* from) return (Packet4f) vec_perm(MSQ, LSQ, mask); // align the data } -template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<Packet4i>(const int* from) +template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html @@ -294,10 +296,10 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<Packet4i>(const int* from) return (Packet4i) vec_perm(MSQ, LSQ, mask); // align the data } -template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } -template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } +template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } +template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } -template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f& from) +template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html @@ -315,7 +317,7 @@ template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part } -template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i& from) +template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html @@ -334,29 +336,29 @@ template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part } -template<> EIGEN_STRONG_INLINE void ei_prefetch<float>(const float* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); } -template<> EIGEN_STRONG_INLINE void ei_prefetch<int>(const int* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); } +template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); } +template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); } -template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; } -template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; } +template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; } +template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; } -template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, ei_p16uc_REVERSE); } -template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, ei_p16uc_REVERSE); } +template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); } +template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pabs(const Packet4f& a) { return vec_abs(a); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a) { return vec_abs(a); } +template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); } +template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); } -template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a) +template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) { Packet4f b, sum; b = (Packet4f) vec_sld(a, a, 8); sum = vec_add(a, b); b = (Packet4f) vec_sld(sum, sum, 4); sum = vec_add(sum, b); - return ei_pfirst(sum); + return pfirst(sum); } -template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs) +template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs) { Packet4f v[4], sum[4]; @@ -384,15 +386,15 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vec return sum[0]; } -template<> EIGEN_STRONG_INLINE int ei_predux<Packet4i>(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a) { Packet4i sum; - sum = vec_sums(a, ei_p4i_ZERO); - sum = vec_sld(sum, ei_p4i_ZERO, 12); - return ei_pfirst(sum); + sum = vec_sums(a, p4i_ZERO); + sum = vec_sld(sum, p4i_ZERO, 12); + return pfirst(sum); } -template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vecs) +template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs) { Packet4i v[4], sum[4]; @@ -422,56 +424,56 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vec // Other reduction functions: // mul -template<> EIGEN_STRONG_INLINE float ei_predux_mul<Packet4f>(const Packet4f& a) +template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a) { Packet4f prod; - prod = ei_pmul(a, (Packet4f)vec_sld(a, a, 8)); - return ei_pfirst(ei_pmul(prod, (Packet4f)vec_sld(prod, prod, 4))); + prod = pmul(a, (Packet4f)vec_sld(a, a, 8)); + return pfirst(pmul(prod, (Packet4f)vec_sld(prod, prod, 4))); } -template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a) { EIGEN_ALIGN16 int aux[4]; - ei_pstore(aux, a); + pstore(aux, a); return aux[0] * aux[1] * aux[2] * aux[3]; } // min -template<> EIGEN_STRONG_INLINE float ei_predux_min<Packet4f>(const Packet4f& a) +template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a) { Packet4f b, res; b = vec_min(a, vec_sld(a, a, 8)); res = vec_min(b, vec_sld(b, b, 4)); - return ei_pfirst(res); + return pfirst(res); } -template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a) { Packet4i b, res; b = vec_min(a, vec_sld(a, a, 8)); res = vec_min(b, vec_sld(b, b, 4)); - return ei_pfirst(res); + return pfirst(res); } // max -template<> EIGEN_STRONG_INLINE float ei_predux_max<Packet4f>(const Packet4f& a) +template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a) { Packet4f b, res; b = vec_max(a, vec_sld(a, a, 8)); res = vec_max(b, vec_sld(b, b, 4)); - return ei_pfirst(res); + return pfirst(res); } -template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a) { Packet4i b, res; b = vec_max(a, vec_sld(a, a, 8)); res = vec_max(b, vec_sld(b, b, 4)); - return ei_pfirst(res); + return pfirst(res); } template<int Offset> -struct ei_palign_impl<Offset,Packet4f> +struct palign_impl<Offset,Packet4f> { EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second) { @@ -481,7 +483,7 @@ struct ei_palign_impl<Offset,Packet4f> }; template<int Offset> -struct ei_palign_impl<Offset,Packet4i> +struct palign_impl<Offset,Packet4i> { EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second) { @@ -489,4 +491,7 @@ struct ei_palign_impl<Offset,Packet4i> first = vec_sld(first, second, Offset*4); } }; + +} // end namespace internal + #endif // EIGEN_PACKET_MATH_ALTIVEC_H |