diff options
author | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-01-09 16:17:08 -0800 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-01-09 16:17:08 -0800 |
commit | cb955df9a6fd5cb2673a7a15172609ce2dafdde8 (patch) | |
tree | 0be65a82aa8cfc22c6fe18856217f888cff16082 /Eigen/src/Core/arch/GPU | |
parent | cb3c059fa4449f7ea1344ea8c677d8b427f6a273 (diff) |
Add packet up "pones". Write pnot(a) as pxor(pones(a), a).
Diffstat (limited to 'Eigen/src/Core/arch/GPU')
-rw-r--r-- | Eigen/src/Core/arch/GPU/PacketMathHalf.h | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/GPU/PacketMathHalf.h b/Eigen/src/Core/arch/GPU/PacketMathHalf.h index 3e35f96cc..c4dfedcf8 100644 --- a/Eigen/src/Core/arch/GPU/PacketMathHalf.h +++ b/Eigen/src/Core/arch/GPU/PacketMathHalf.h @@ -143,6 +143,10 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& return result; } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pones<half2>(const half2& a) { + half2 result; + *(reinterpret_cast<unsigned*>(&(result))) = 0xffffffffu; +} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<half2,2>& kernel) { @@ -640,6 +644,14 @@ EIGEN_STRONG_INLINE Packet16h float2half(const Packet16f& a) { #endif } +template<> EIGEN_STRONG_INLINE Packet16h pnot(const Packet16h& a) { + Packet16h r; r.x = _mm256_xor_si256(a.x, pcmp_eq(a.x, a.x)); return r; +} + +template<> EIGEN_STRONG_INLINE Packet16h pones(const Packet16h& a) { + Packet16h r; r.x = Packet8i(pones(a.x)); return r; +} + template<> EIGEN_STRONG_INLINE Packet16h por(const Packet16h& a,const Packet16h& b) { // in some cases Packet8i is a wrapper around __m256i, so we need to // cast to Packet8i to call the correct overload. @@ -1085,6 +1097,10 @@ EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f& a) { #endif } +template<> EIGEN_STRONG_INLINE Packet8h pones(const Packet8h& a) { + Packet8h r; r.x = _mm_cmpeq_epi32(a.x, a.x); return r; +} + template<> EIGEN_STRONG_INLINE Packet8h por(const Packet8h& a,const Packet8h& b) { // in some cases Packet4i is a wrapper around __m128i, so we either need to // cast to Packet4i to directly call the intrinsics as below: |