diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-10-06 10:41:03 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-10-06 10:41:03 -0700 |
commit | d485d12c51bc46286f7439377e3ab591f67ddbbf (patch) | |
tree | dff9b7c1ebeee65ece78e9984948f54192b56214 /Eigen/src | |
parent | 80b513378948f78bc7729c431eb68ca5513a1d62 (diff) |
Added missing AVX intrinsics for fp16: in particular, implemented predux which is required by the matrix-vector code.
Diffstat (limited to 'Eigen/src')
-rw-r--r-- | Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index 82dfc12c9..9dd89e07f 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -492,6 +492,30 @@ template<> EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet8h>(Eigen::half* to[stride*7].x = aux[7].x; } +template<> EIGEN_STRONG_INLINE Eigen::half predux<Packet8h>(const Packet8h& a) { + Packet8f af = half2float(a); + float reduced = predux<Packet8f>(af); + return Eigen::half(reduced); +} + +template<> EIGEN_STRONG_INLINE Eigen::half predux_max<Packet8h>(const Packet8h& a) { + Packet8f af = half2float(a); + float reduced = predux_max<Packet8f>(af); + return Eigen::half(reduced); +} + +template<> EIGEN_STRONG_INLINE Eigen::half predux_min<Packet8h>(const Packet8h& a) { + Packet8f af = half2float(a); + float reduced = predux_min<Packet8f>(af); + return Eigen::half(reduced); +} + +template<> EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet8h>(const Packet8h& a) { + Packet8f af = half2float(a); + float reduced = predux_mul<Packet8f>(af); + return Eigen::half(reduced); +} + EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8h,8>& kernel) { __m128i a = kernel.packet[0].x; |