aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SSE
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-01-09 16:34:23 -0800
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-01-09 16:34:23 -0800
commit8f044425263e876236030f62461507325edfdf44 (patch)
tree42cf697f4272f132dd4e20160663e1d9686ddfb6 /Eigen/src/Core/arch/SSE
parentcb3c059fa4449f7ea1344ea8c677d8b427f6a273 (diff)
Collapsed revision
* Collapsed revision * Add packet up "pones". Write pnot(a) as pxor(pones(a), a). * Collapsed revision * Simplify a bit. * Undo useless diffs. * Fix typo.
Diffstat (limited to 'Eigen/src/Core/arch/SSE')
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h11
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h11
2 files changed, 18 insertions, 4 deletions
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index a7304193b..fa84097ac 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -82,6 +82,9 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con
#endif
}
+template<> EIGEN_STRONG_INLINE Packet2cf pones <Packet2cf>(const Packet2cf& a) { return Packet2cf(pones(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnot <Packet2cf>(const Packet2cf& a) { return Packet2cf(pnot(a.v)); }
+
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
@@ -305,6 +308,8 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
#endif
}
+template<> EIGEN_STRONG_INLINE Packet1cd pones <Packet1cd>(const Packet1cd& a) { return Packet1cd(pones(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnot <Packet1cd>(const Packet1cd& a) { return Packet1cd(pnot(a.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
@@ -442,15 +447,13 @@ ptranspose(PacketBlock<Packet2cf,2>& kernel) {
template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b)
{
__m128 eq = _mm_cmpeq_ps(a.v, b.v);
- __m128 real_and_imag_equal = _mm_and_ps(eq, vec4f_swizzle1(eq, 1, 0, 3, 2));
- return Packet2cf(real_and_imag_equal);
+ return Packet2cf(pand(eq, vec4f_swizzle1(eq, 1, 0, 3, 2)));
}
template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b)
{
__m128d eq = _mm_cmpeq_pd(a.v, b.v);
- __m128d real_and_imag_equal = _mm_and_pd(eq, vec2d_swizzle1(eq, 1, 0));
- return Packet1cd(real_and_imag_equal);
+ return Packet1cd(pand(eq, vec2d_swizzle1(eq, 1, 0)));
}
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index b8a5497a9..6dd2f8a46 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -378,6 +378,17 @@ template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4
template<> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { return _mm_cmpeq_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { return _mm_cmpnge_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pones<Packet4i>(const Packet4i& a) { return _mm_cmpeq_epi32(a, a); }
+template<> EIGEN_STRONG_INLINE Packet4f
+pones<Packet4f>(const Packet4f& a) {
+ Packet4i b = _mm_castps_si128(a);
+ return _mm_castsi128_ps(_mm_cmpeq_epi32(b, b));
+}
+template<> EIGEN_STRONG_INLINE Packet2d
+pones<Packet2d>(const Packet2d& a) {
+ Packet4i b = _mm_castpd_si128(a);
+ return _mm_castsi128_pd(_mm_cmpeq_epi32(b, b));
+}
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }