diff options
Diffstat (limited to 'Eigen/src/Core/arch/NEON')
-rw-r--r-- | Eigen/src/Core/arch/NEON/Complex.h | 92 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 141 |
2 files changed, 121 insertions, 112 deletions
diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index 9678040e7..d1943ba3b 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -22,11 +22,13 @@ // License and a copy of the GNU General Public License along with // Eigen. If not, see <http://www.gnu.org/licenses/>. -#ifndef EIGEN_COMPLEX_ALTIVEC_H -#define EIGEN_COMPLEX_ALTIVEC_H +#ifndef EIGEN_COMPLEX_NEON_H +#define EIGEN_COMPLEX_NEON_H -static uint32x4_t ei_p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; -static uint32x2_t ei_p2ui_CONJ_XOR = { 0x00000000, 0x80000000 }; +namespace internal { + +static uint32x4_t p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; +static uint32x2_t p2ui_CONJ_XOR = { 0x00000000, 0x80000000 }; //---------- float ---------- struct Packet2cf @@ -36,7 +38,7 @@ struct Packet2cf Packet4f v; }; -template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits +template<> struct packet_traits<std::complex<float> > : default_packet_traits { typedef Packet2cf type; enum { @@ -56,9 +58,9 @@ template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_tr }; }; -template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; }; +template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; }; -template<> EIGEN_STRONG_INLINE Packet2cf ei_pset1<Packet2cf>(const std::complex<float>& from) +template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) { float32x2_t r64; r64 = vld1_f32((float *)&from); @@ -66,15 +68,15 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_pset1<Packet2cf>(const std::complex< return Packet2cf(vcombine_f32(r64, r64)); } -template<> EIGEN_STRONG_INLINE Packet2cf ei_padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(ei_padd<Packet4f>(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf ei_psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(ei_psub<Packet4f>(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a) { return Packet2cf(ei_pnegate<Packet4f>(a.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a) +template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { - return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v), ei_p4ui_CONJ_XOR))); + return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v), p4ui_CONJ_XOR))); } -template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) +template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { Packet4f v1, v2; float32x2_t a_lo, a_hi; @@ -88,7 +90,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, // Multiply the imag a with b v2 = vmulq_f32(v2, b.v); // Conjugate v2 - v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), ei_p4ui_CONJ_XOR)); + v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR)); // Swap real/imag elements in v2. a_lo = vrev64_f32(vget_low_f32(v2)); a_hi = vrev64_f32(vget_high_f32(v2)); @@ -97,39 +99,39 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, return Packet2cf(vaddq_f32(v1, v2)); } -template<> EIGEN_STRONG_INLINE Packet2cf ei_pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) +template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); } -template<> EIGEN_STRONG_INLINE Packet2cf ei_por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) +template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); } -template<> EIGEN_STRONG_INLINE Packet2cf ei_pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) +template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); } -template<> EIGEN_STRONG_INLINE Packet2cf ei_pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) +template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); } -template<> EIGEN_STRONG_INLINE Packet2cf ei_pload <std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(ei_pload((const float*)from)); } -template<> EIGEN_STRONG_INLINE Packet2cf ei_ploadu<std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ei_ploadu((const float*)from)); } +template<> EIGEN_STRONG_INLINE Packet2cf pload <std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload((const float*)from)); } +template<> EIGEN_STRONG_INLINE Packet2cf ploadu<std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu((const float*)from)); } -template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstore((float*)to, from.v); } -template<> EIGEN_STRONG_INLINE void ei_pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((float*)to, from.v); } +template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } +template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } -template<> EIGEN_STRONG_INLINE void ei_prefetch<std::complex<float> >(const std::complex<float> * addr) { __pld((float *)addr); } +template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { __pld((float *)addr); } -template<> EIGEN_STRONG_INLINE std::complex<float> ei_pfirst<Packet2cf>(const Packet2cf& a) +template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) { std::complex<float> EIGEN_ALIGN16 x[2]; vst1q_f32((float *)x, a.v); return x[0]; } -template<> EIGEN_STRONG_INLINE Packet2cf ei_preverse(const Packet2cf& a) +template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { float32x2_t a_lo, a_hi; Packet4f a_r128; @@ -141,12 +143,12 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_preverse(const Packet2cf& a) return Packet2cf(a_r128); } -EIGEN_STRONG_INLINE Packet2cf ei_pcplxflip/*<Packet2cf>*/(const Packet2cf& x) +EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x) { return Packet2cf(vrev64q_f32(a.v)); } -template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux<Packet2cf>(const Packet2cf& a) +template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a) { float32x2_t a1, a2; std::complex<float> s; @@ -159,7 +161,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux<Packet2cf>(const Pa return s; } -template<> EIGEN_STRONG_INLINE Packet2cf ei_preduxp<Packet2cf>(const Packet2cf* vecs) +template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs) { Packet4f sum1, sum2, sum; @@ -171,7 +173,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_preduxp<Packet2cf>(const Packet2cf* return Packet2cf(sum); } -template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux_mul<Packet2cf>(const Packet2cf& a) +template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a) { float32x2_t a1, a2, v1, v2, prod; std::complex<float> s; @@ -187,7 +189,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux_mul<Packet2cf>(cons // Multiply the imag a with b v2 = vmul_f32(v2, a2); // Conjugate v2 - v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), ei_p2ui_CONJ_XOR)); + v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR)); // Swap real/imag elements in v2. v2 = vrev64_f32(v2); // Add v1, v2 @@ -199,7 +201,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux_mul<Packet2cf>(cons } template<int Offset> -struct ei_palign_impl<Offset,Packet2cf> +struct palign_impl<Offset,Packet2cf> { EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second) { @@ -210,43 +212,43 @@ struct ei_palign_impl<Offset,Packet2cf> } }; -template<> struct ei_conj_helper<Packet2cf, Packet2cf, false,true> +template<> struct conj_helper<Packet2cf, Packet2cf, false,true> { EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return ei_padd(pmul(x,y),c); } + { return padd(pmul(x,y),c); } EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const { - return ei_pmul(a, ei_pconj(b)); + return pmul(a, pconj(b)); } }; -template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,false> +template<> struct conj_helper<Packet2cf, Packet2cf, true,false> { EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return ei_padd(pmul(x,y),c); } + { return padd(pmul(x,y),c); } EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const { - return ei_pmul(ei_pconj(a), b); + return pmul(pconj(a), b); } }; -template<> struct ei_conj_helper<Packet2cf, Packet2cf, true,true> +template<> struct conj_helper<Packet2cf, Packet2cf, true,true> { EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return ei_padd(pmul(x,y),c); } + { return padd(pmul(x,y),c); } EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const { - return ei_pconj(ei_pmul(a, b)); + return pconj(pmul(a, b)); } }; -template<> EIGEN_STRONG_INLINE Packet2cf ei_pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) +template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { // TODO optimize it for AltiVec - Packet2cf res = ei_conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b); + Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b); Packet4f s, rev_s; float32x2_t a_lo, a_hi; @@ -256,7 +258,9 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_pdiv<Packet2cf>(const Packet2cf& a, a_hi = vrev64_f32(vget_high_f32(s)); rev_s = vcombine_f32(a_lo, a_hi); - return Packet2cf(ei_pdiv(res.v, vaddq_f32(s,rev_s))); + return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s))); } -#endif // EIGEN_COMPLEX_ALTIVEC_H +} // end namespace internal + +#endif // EIGEN_COMPLEX_NEON_H diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 8220ed07c..cae35d737 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -27,6 +27,8 @@ #ifndef EIGEN_PACKET_MATH_NEON_H #define EIGEN_PACKET_MATH_NEON_H +namespace internal { + #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8 #endif @@ -45,19 +47,19 @@ typedef float32x4_t Packet4f; typedef int32x4_t Packet4i; #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ - const Packet4f ei_p4f_##NAME = ei_pset1<Packet4f>(X) + const Packet4f p4f_##NAME = pset1<Packet4f>(X) #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ - const Packet4f ei_p4f_##NAME = vreinterpretq_f32_u32(ei_pset1<int>(X)) + const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X)) #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ - const Packet4i ei_p4i_##NAME = ei_pset1<Packet4i>(X) + const Packet4i p4i_##NAME = pset1<Packet4i>(X) #ifndef __pld #define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" ); #endif -template<> struct ei_packet_traits<float> : ei_default_packet_traits +template<> struct packet_traits<float> : default_packet_traits { typedef Packet4f type; enum { @@ -74,7 +76,7 @@ template<> struct ei_packet_traits<float> : ei_default_packet_traits HasSqrt = 0 }; }; -template<> struct ei_packet_traits<int> : ei_default_packet_traits +template<> struct packet_traits<int> : default_packet_traits { typedef Packet4i type; enum { @@ -85,36 +87,36 @@ template<> struct ei_packet_traits<int> : ei_default_packet_traits }; }; -template<> struct ei_unpacket_traits<Packet4f> { typedef float type; enum {size=4}; }; -template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size=4}; }; +template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; }; +template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; }; -template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); } +template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); } -template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) +template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { Packet4f countdown = { 3, 2, 1, 0 }; - return vaddq_f32(ei_pset1<Packet4f>(a), countdown); + return vaddq_f32(pset1<Packet4f>(a), countdown); } -template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) +template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { Packet4i countdown = { 3, 2, 1, 0 }; - return vaddq_s32(ei_pset1<Packet4i>(a), countdown); + return vaddq_s32(pset1<Packet4i>(a), countdown); } -template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i ei_padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f ei_psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i ei_psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pnegate(const Packet4f& a) { return vnegq_f32(a); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pnegate(const Packet4i& a) { return vnegq_s32(a); } +template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return vnegq_f32(a); } +template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return vnegq_s32(a); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) +template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { Packet4f inv, restep, div; @@ -135,59 +137,59 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, con return div; } -template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/) -{ ei_assert(false && "packet integer division are not supported by NEON"); - return ei_pset1<Packet4i>(0); +template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/) +{ eigen_assert(false && "packet integer division are not supported by NEON"); + return pset1<Packet4i>(0); } // for some weird raisons, it has to be overloaded for packet of integers -template<> EIGEN_STRONG_INLINE Packet4i ei_pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return ei_padd(ei_pmul(a,b), c); } +template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); } // Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics -template<> EIGEN_STRONG_INLINE Packet4f ei_pand<Packet4f>(const Packet4f& a, const Packet4f& b) +template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f ei_por<Packet4f>(const Packet4f& a, const Packet4f& b) +template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); } -template<> EIGEN_STRONG_INLINE Packet4i ei_por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pxor<Packet4f>(const Packet4f& a, const Packet4f& b) +template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) +template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } +template<> EIGEN_STRONG_INLINE Packet4f pload<float>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i pload<int>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } -template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); } +template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); } -template<> EIGEN_STRONG_INLINE Packet4f ei_ploaddup<Packet4f>(const float* from) +template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) { float32x2_t lo, ho; lo = vdup_n_f32(*from); hi = vdup_n_f32(*from); return vcombine_f32(lo, hi); } -template<> EIGEN_STRONG_INLINE Packet4i ei_ploaddup<Packet4i>(const float* from) +template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const float* from) { int32x2_t lo, ho; lo = vdup_n_s32(*from); @@ -195,20 +197,20 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_ploaddup<Packet4i>(const float* from) return vcombine_s32(lo, hi); } -template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); } -template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); } +template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); } +template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); } -template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } -template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } +template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } +template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } -template<> EIGEN_STRONG_INLINE void ei_prefetch<float>(const float* addr) { __pld(addr); } -template<> EIGEN_STRONG_INLINE void ei_prefetch<int>(const int* addr) { __pld(addr); } +template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { __pld(addr); } +template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { __pld(addr); } // FIXME only store the 2 first elements ? -template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; } -template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; } +template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; } +template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; } -template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a) { +template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { float32x2_t a_lo, a_hi; Packet4f a_r64; @@ -217,7 +219,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a) { a_hi = vget_high_f32(a_r64); return vcombine_f32(a_hi, a_lo); } -template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a) { +template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { int32x2_t a_lo, a_hi; Packet4i a_r64; @@ -226,10 +228,10 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a) { a_hi = vget_high_s32(a_r64); return vcombine_s32(a_hi, a_lo); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pabs(const Packet4f& a) { return vabsq_f32(a); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a) { return vabsq_s32(a); } +template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); } +template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); } -template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a) +template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) { float32x2_t a_lo, a_hi, sum; float s[2]; @@ -243,7 +245,7 @@ template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a) return s[0]; } -template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs) +template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs) { float32x4x2_t vtrn1, vtrn2, res1, res2; Packet4f sum1, sum2, sum; @@ -263,7 +265,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vec return sum; } -template<> EIGEN_STRONG_INLINE int ei_predux<Packet4i>(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a) { int32x2_t a_lo, a_hi, sum; int32_t s[2]; @@ -277,7 +279,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux<Packet4i>(const Packet4i& a) return s[0]; } -template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vecs) +template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs) { int32x4x2_t vtrn1, vtrn2, res1, res2; Packet4i sum1, sum2, sum; @@ -299,7 +301,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vec // Other reduction functions: // mul -template<> EIGEN_STRONG_INLINE float ei_predux_mul<Packet4f>(const Packet4f& a) +template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a) { float32x2_t a_lo, a_hi, prod; float s[2]; @@ -315,7 +317,7 @@ template<> EIGEN_STRONG_INLINE float ei_predux_mul<Packet4f>(const Packet4f& a) return s[0]; } -template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a) { int32x2_t a_lo, a_hi, prod; int32_t s[2]; @@ -333,7 +335,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a) } // min -template<> EIGEN_STRONG_INLINE float ei_predux_min<Packet4f>(const Packet4f& a) +template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a) { float32x2_t a_lo, a_hi, min; float s[2]; @@ -346,7 +348,7 @@ template<> EIGEN_STRONG_INLINE float ei_predux_min<Packet4f>(const Packet4f& a) return s[0]; } -template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a) { int32x2_t a_lo, a_hi, min; int32_t s[2]; @@ -361,7 +363,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a) } // max -template<> EIGEN_STRONG_INLINE float ei_predux_max<Packet4f>(const Packet4f& a) +template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a) { float32x2_t a_lo, a_hi, max; float s[2]; @@ -374,7 +376,7 @@ template<> EIGEN_STRONG_INLINE float ei_predux_max<Packet4f>(const Packet4f& a) return s[0]; } -template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a) +template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a) { int32x2_t a_lo, a_hi, max; int32_t s[2]; @@ -389,7 +391,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a) } template<int Offset> -struct ei_palign_impl<Offset,Packet4f> +struct palign_impl<Offset,Packet4f> { EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second) { @@ -399,7 +401,7 @@ struct ei_palign_impl<Offset,Packet4f> }; template<int Offset> -struct ei_palign_impl<Offset,Packet4i> +struct palign_impl<Offset,Packet4i> { EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second) { @@ -407,4 +409,7 @@ struct ei_palign_impl<Offset,Packet4i> first = vextq_s32(first, second, Offset); } }; + +} // end namespace internal + #endif // EIGEN_PACKET_MATH_NEON_H |