aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2009-03-09 18:40:09 +0000
committerGravatar Gael Guennebaud <g.gael@free.fr>2009-03-09 18:40:09 +0000
commit3f80c68be52543d1463eec4461c3b79f49f33ed9 (patch)
treee79a9bd83fc0d95cf4428598cd05c4052ad94302 /Eigen
parentbd8107c90c795322eb863d2f0bf2a3d49e3bc019 (diff)
add the vectorization of abs
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/Functors.h5
-rw-r--r--Eigen/src/Core/GenericPacketMath.h4
-rw-r--r--Eigen/src/Core/arch/AltiVec/PacketMath.h3
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h24
4 files changed, 34 insertions, 2 deletions
diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h
index 41e94e28d..6a8cf980d 100644
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -203,13 +203,16 @@ struct ei_functor_traits<ei_scalar_opposite_op<Scalar> >
template<typename Scalar> struct ei_scalar_abs_op EIGEN_EMPTY_STRUCT {
typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return ei_abs(a); }
+ template<typename PacketScalar>
+ EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
+ { return ei_pabs(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_abs_op<Scalar> >
{
enum {
Cost = NumTraits<Scalar>::AddCost,
- PacketAccess = false // FIXME this could actually be vectorized with SSSE3.
+ PacketAccess = int(ei_packet_traits<Scalar>::size)>1
};
};
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 10d39b68b..6be33f6e5 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -64,6 +64,10 @@ template<typename Packet> inline Packet
ei_pmax(const Packet& a,
const Packet& b) { return std::max(a, b); }
+/** \internal \returns the absolute value of \a a */
+template<typename Packet> inline Packet
+ei_pabs(const Packet& a) { return ei_abs(a); }
+
/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type
ei_pload(const Scalar* from) { return *from; }
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 2f7aaf6b6..5c6bce37e 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -163,6 +163,9 @@ template<> inline v4i ei_pmin(const v4i& a, const v4i& b) { return vec_min(
template<> inline v4f ei_pmax(const v4f& a, const v4f& b) { return vec_max(a,b); }
template<> inline v4i ei_pmax(const v4i& a, const v4i& b) { return vec_max(a,b); }
+template<> EIGEN_STRONG_INLINE v4f ei_pabs(const v4f& a) { return vec_abs(a); }
+template<> EIGEN_STRONG_INLINE v4i ei_pabs(const v4i& a) { return vec_abs(a); }
+
template<> inline v4f ei_pload(const float* from) { return vec_ld(0, from); }
template<> inline v4i ei_pload(const int* from) { return vec_ld(0, from); }
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index c3ab0588f..cd59549e5 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -34,7 +34,7 @@
#define ei_vec4i_swizzle1(v,p,q,r,s) \
(_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
-
+
#define ei_vec4f_swizzle2(a,b,p,q,r,s) \
(_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
@@ -146,6 +146,28 @@ template<> EIGEN_STRONG_INLINE __m128d ei_preverse(const __m128d& a)
template<> EIGEN_STRONG_INLINE __m128i ei_preverse(const __m128i& a)
{ return _mm_shuffle_epi32(a,0x1B); }
+
+template<> EIGEN_STRONG_INLINE __m128 ei_pabs(const __m128& a)
+{
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
+ return _mm_and_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE __m128d ei_pabs(const __m128d& a)
+{
+ const __m128d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
+ return _mm_and_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE __m128i ei_pabs(const __m128i& a)
+{
+ #ifdef __SSSE3__
+ return _mm_abs_epi32(a);
+ #else
+ __m128i aux = _mm_srai_epi32(a,31);
+ return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
+ #endif
+}
+
+
#ifdef __SSE3__
// TODO implement SSE2 versions as well as integer versions
template<> EIGEN_STRONG_INLINE __m128 ei_preduxp<__m128>(const __m128* vecs)