aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-06-06 20:18:44 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-06-06 20:18:44 -0700
commit29aebf96e62f4fb5e4b1f3fb475e299df2e7a02e (patch)
treedb806c8c2022e8719b83c44ead705da397b4799d /Eigen/src/Core
parent79085e08e9512f678b4584df49d1b2835b40117f (diff)
Created the pblend packet primitive and implemented it using SSE and AVX instructions.
Diffstat (limited to 'Eigen/src/Core')
-rwxr-xr-xEigen/src/Core/GenericPacketMath.h14
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h15
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h8
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h41
4 files changed, 74 insertions, 4 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 98313c68f..0869dd49f 100755
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -54,6 +54,7 @@ struct default_packet_traits
HasMax = 1,
HasConj = 1,
HasSetLinear = 1,
+ HasBlend = 0,
HasDiv = 0,
HasSqrt = 0,
@@ -429,6 +430,19 @@ ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
// Nothing to do in the scalar case, i.e. a 1x1 matrix.
}
+/***************************************************************************
+ * Selector, i.e. vector of N boolean values used to select (i.e. blend)
+ * words from 2 packets.
+***************************************************************************/
+template <size_t N> struct Selector {
+ bool select[N];
+};
+
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
+ return ifPacket.select[0] ? thenPacket : elsePacket;
+}
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 8b8307d75..688ff91e4 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -59,6 +59,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasLog = 0,
HasExp = 0,
HasSqrt = 0
+ HasBlend = 1,
};
};
template<> struct packet_traits<double> : default_packet_traits
@@ -73,6 +74,7 @@ template<> struct packet_traits<double> : default_packet_traits
HasDiv = 1,
HasExp = 0
+ HasBlend = 1,
};
};
@@ -557,6 +559,19 @@ ptranspose(PacketBlock<Packet4d,4>& kernel) {
kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
}
+template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) {
+ const __m256 zero = _mm256_setzero_ps();
+ const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);
+ return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) {
+ const __m256d zero = _mm256_setzero_pd();
+ const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);
+ return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
+}
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index 758183c18..0bc03cf9e 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -44,7 +44,8 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
- HasSetLinear = 0
+ HasSetLinear = 0,
+ HasBlend = 1
};
};
#endif
@@ -472,6 +473,11 @@ ptranspose(PacketBlock<Packet2cf,2>& kernel) {
kernel.packet[1].v = tmp;
}
+template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
+ __m128d result = pblend(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
+ return Packet2cf(_mm_castpd_ps(result));
+}
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 6912f3bc3..1124b24df 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -108,7 +108,8 @@ template<> struct packet_traits<float> : default_packet_traits
HasCos = EIGEN_FAST_MATH,
HasLog = 1,
HasExp = 1,
- HasSqrt = 1
+ HasSqrt = 1,
+ HasBlend = 1
};
};
template<> struct packet_traits<double> : default_packet_traits
@@ -123,7 +124,8 @@ template<> struct packet_traits<double> : default_packet_traits
HasDiv = 1,
HasExp = 1,
- HasSqrt = 1
+ HasSqrt = 1,
+ HasBlend = 1
};
};
#endif
@@ -135,7 +137,9 @@ template<> struct packet_traits<int> : default_packet_traits
// FIXME check the Has*
Vectorizable = 1,
AlignedOnScalar = 1,
- size=4
+ size=4,
+
+ HasBlend = 1
};
};
@@ -809,6 +813,37 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) {
kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
}
+template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m128i false_mask = _mm_cmpeq_epi32(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
+ const __m128 zero = _mm_setzero_ps();
+ const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m128 false_mask = _mm_cmpeq_ps(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_ps(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
+ const __m128d zero = _mm_setzero_pd();
+ const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]);
+ __m128d false_mask = _mm_cmpeq_pd(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_pd(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));
+#endif
+}
+
} // end namespace internal
} // end namespace Eigen