aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/GPU
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-01-09 16:17:08 -0800
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-01-09 16:17:08 -0800
commitcb955df9a6fd5cb2673a7a15172609ce2dafdde8 (patch)
tree0be65a82aa8cfc22c6fe18856217f888cff16082 /Eigen/src/Core/arch/GPU
parentcb3c059fa4449f7ea1344ea8c677d8b427f6a273 (diff)
Add packet up "pones". Write pnot(a) as pxor(pones(a), a).
Diffstat (limited to 'Eigen/src/Core/arch/GPU')
-rw-r--r--Eigen/src/Core/arch/GPU/PacketMathHalf.h16
1 files changed, 16 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/GPU/PacketMathHalf.h b/Eigen/src/Core/arch/GPU/PacketMathHalf.h
index 3e35f96cc..c4dfedcf8 100644
--- a/Eigen/src/Core/arch/GPU/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/GPU/PacketMathHalf.h
@@ -143,6 +143,10 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pabs<half2>(const half2&
return result;
}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pones<half2>(const half2& a) {
+ half2 result;
+ *(reinterpret_cast<unsigned*>(&(result))) = 0xffffffffu;
+}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
ptranspose(PacketBlock<half2,2>& kernel) {
@@ -640,6 +644,14 @@ EIGEN_STRONG_INLINE Packet16h float2half(const Packet16f& a) {
#endif
}
+template<> EIGEN_STRONG_INLINE Packet16h pnot(const Packet16h& a) {
+ Packet16h r; r.x = _mm256_xor_si256(a.x, pcmp_eq(a.x, a.x)); return r;
+}
+
+template<> EIGEN_STRONG_INLINE Packet16h pones(const Packet16h& a) {
+ Packet16h r; r.x = Packet8i(pones(a.x)); return r;
+}
+
template<> EIGEN_STRONG_INLINE Packet16h por(const Packet16h& a,const Packet16h& b) {
// in some cases Packet8i is a wrapper around __m256i, so we need to
// cast to Packet8i to call the correct overload.
@@ -1085,6 +1097,10 @@ EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f& a) {
#endif
}
+template<> EIGEN_STRONG_INLINE Packet8h pones(const Packet8h& a) {
+ Packet8h r; r.x = _mm_cmpeq_epi32(a.x, a.x); return r;
+}
+
template<> EIGEN_STRONG_INLINE Packet8h por(const Packet8h& a,const Packet8h& b) {
// in some cases Packet4i is a wrapper around __m128i, so we either need to
// cast to Packet4i to directly call the intrinsics as below: