diff options
-rw-r--r-- | Eigen/src/Core/Functors.h | 5 | ||||
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/PacketMath.h | 3 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/PacketMath.h | 24 | ||||
-rw-r--r-- | test/packetmath.cpp | 28 |
5 files changed, 52 insertions, 12 deletions
diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index 41e94e28d..6a8cf980d 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -203,13 +203,16 @@ struct ei_functor_traits<ei_scalar_opposite_op<Scalar> > template<typename Scalar> struct ei_scalar_abs_op EIGEN_EMPTY_STRUCT { typedef typename NumTraits<Scalar>::Real result_type; EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return ei_abs(a); } + template<typename PacketScalar> + EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const + { return ei_pabs(a); } }; template<typename Scalar> struct ei_functor_traits<ei_scalar_abs_op<Scalar> > { enum { Cost = NumTraits<Scalar>::AddCost, - PacketAccess = false // FIXME this could actually be vectorized with SSSE3. + PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; }; diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 10d39b68b..6be33f6e5 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -64,6 +64,10 @@ template<typename Packet> inline Packet ei_pmax(const Packet& a, const Packet& b) { return std::max(a, b); } +/** \internal \returns the absolute value of \a a */ +template<typename Packet> inline Packet +ei_pabs(const Packet& a) { return ei_abs(a); } + /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */ template<typename Scalar> inline typename ei_packet_traits<Scalar>::type ei_pload(const Scalar* from) { return *from; } diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 2f7aaf6b6..5c6bce37e 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -163,6 +163,9 @@ template<> inline v4i ei_pmin(const v4i& a, const v4i& b) { return vec_min( template<> inline v4f ei_pmax(const v4f& a, const v4f& b) { return vec_max(a,b); } template<> inline v4i ei_pmax(const v4i& a, const v4i& b) { return vec_max(a,b); } +template<> EIGEN_STRONG_INLINE v4f ei_pabs(const v4f& a) { return vec_abs(a); } +template<> EIGEN_STRONG_INLINE v4i ei_pabs(const v4i& a) { return vec_abs(a); } + template<> inline v4f ei_pload(const float* from) { return vec_ld(0, from); } template<> inline v4i ei_pload(const int* from) { return vec_ld(0, from); } diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index c3ab0588f..cd59549e5 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -34,7 +34,7 @@ #define ei_vec4i_swizzle1(v,p,q,r,s) \ (_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p)))) - + #define ei_vec4f_swizzle2(a,b,p,q,r,s) \ (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p)))) @@ -146,6 +146,28 @@ template<> EIGEN_STRONG_INLINE __m128d ei_preverse(const __m128d& a) template<> EIGEN_STRONG_INLINE __m128i ei_preverse(const __m128i& a) { return _mm_shuffle_epi32(a,0x1B); } + +template<> EIGEN_STRONG_INLINE __m128 ei_pabs(const __m128& a) +{ + const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF)); + return _mm_and_ps(a,mask); +} +template<> EIGEN_STRONG_INLINE __m128d ei_pabs(const __m128d& a) +{ + const __m128d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF)); + return _mm_and_pd(a,mask); +} +template<> EIGEN_STRONG_INLINE __m128i ei_pabs(const __m128i& a) +{ + #ifdef __SSSE3__ + return _mm_abs_epi32(a); + #else + __m128i aux = _mm_srai_epi32(a,31); + return _mm_sub_epi32(_mm_xor_si128(a,aux),aux); + #endif +} + + #ifdef __SSE3__ // TODO implement SSE2 versions as well as integer versions template<> EIGEN_STRONG_INLINE __m128 ei_preduxp<__m128>(const __m128* vecs) diff --git a/test/packetmath.cpp b/test/packetmath.cpp index c7694b660..892777538 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -33,13 +33,20 @@ template<typename Scalar> bool areApprox(const Scalar* a, const Scalar* b, int s return true; } -#define CHECK_CWISE(REFOP, POP) { \ +#define CHECK_CWISE2(REFOP, POP) { \ for (int i=0; i<PacketSize; ++i) \ ref[i] = REFOP(data1[i], data1[i+PacketSize]); \ ei_pstore(data2, POP(ei_pload(data1), ei_pload(data1+PacketSize))); \ VERIFY(areApprox(ref, data2, PacketSize) && #POP); \ } +#define CHECK_CWISE1(REFOP, POP) { \ + for (int i=0; i<PacketSize; ++i) \ + ref[i] = REFOP(data1[i]); \ + ei_pstore(data2, POP(ei_pload(data1))); \ + VERIFY(areApprox(ref, data2, PacketSize) && #POP); \ +} + #define REF_ADD(a,b) ((a)+(b)) #define REF_SUB(a,b) ((a)-(b)) #define REF_MUL(a,b) ((a)*(b)) @@ -103,15 +110,16 @@ template<typename Scalar> void packetmath() VERIFY(areApprox(ref, data2, PacketSize) && "ei_palign"); } - CHECK_CWISE(REF_ADD, ei_padd); - CHECK_CWISE(REF_SUB, ei_psub); - CHECK_CWISE(REF_MUL, ei_pmul); + CHECK_CWISE2(REF_ADD, ei_padd); + CHECK_CWISE2(REF_SUB, ei_psub); + CHECK_CWISE2(REF_MUL, ei_pmul); #ifndef EIGEN_VECTORIZE_ALTIVEC if (!ei_is_same_type<Scalar,int>::ret) - CHECK_CWISE(REF_DIV, ei_pdiv); + CHECK_CWISE2(REF_DIV, ei_pdiv); #endif - CHECK_CWISE(std::min, ei_pmin); - CHECK_CWISE(std::max, ei_pmax); + CHECK_CWISE2(std::min, ei_pmin); + CHECK_CWISE2(std::max, ei_pmax); + CHECK_CWISE1(ei_abs, ei_pabs); for (int i=0; i<PacketSize; ++i) ref[i] = data1[0]; @@ -124,17 +132,17 @@ template<typename Scalar> void packetmath() for (int i=0; i<PacketSize; ++i) ref[0] += data1[i]; VERIFY(ei_isApprox(ref[0], ei_predux(ei_pload(data1))) && "ei_predux"); - + ref[0] = 1; for (int i=0; i<PacketSize; ++i) ref[0] *= data1[i]; VERIFY(ei_isApprox(ref[0], ei_predux_mul(ei_pload(data1))) && "ei_predux_mul"); - + ref[0] = data1[0]; for (int i=0; i<PacketSize; ++i) ref[0] = std::min(ref[0],data1[i]); VERIFY(ei_isApprox(ref[0], ei_predux_min(ei_pload(data1))) && "ei_predux_min"); - + ref[0] = data1[0]; for (int i=0; i<PacketSize; ++i) ref[0] = std::max(ref[0],data1[i]); |