aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/CUDA
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-10-06 10:41:03 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-10-06 10:41:03 -0700
commitd485d12c51bc46286f7439377e3ab591f67ddbbf (patch)
treedff9b7c1ebeee65ece78e9984948f54192b56214 /Eigen/src/Core/arch/CUDA
parent80b513378948f78bc7729c431eb68ca5513a1d62 (diff)
Added missing AVX intrinsics for fp16: in particular, implemented predux which is required by the matrix-vector code.
Diffstat (limited to 'Eigen/src/Core/arch/CUDA')
-rw-r--r--Eigen/src/Core/arch/CUDA/PacketMathHalf.h24
1 files changed, 24 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
index 82dfc12c9..9dd89e07f 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@@ -492,6 +492,30 @@ template<> EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet8h>(Eigen::half*
to[stride*7].x = aux[7].x;
}
+template<> EIGEN_STRONG_INLINE Eigen::half predux<Packet8h>(const Packet8h& a) {
+ Packet8f af = half2float(a);
+ float reduced = predux<Packet8f>(af);
+ return Eigen::half(reduced);
+}
+
+template<> EIGEN_STRONG_INLINE Eigen::half predux_max<Packet8h>(const Packet8h& a) {
+ Packet8f af = half2float(a);
+ float reduced = predux_max<Packet8f>(af);
+ return Eigen::half(reduced);
+}
+
+template<> EIGEN_STRONG_INLINE Eigen::half predux_min<Packet8h>(const Packet8h& a) {
+ Packet8f af = half2float(a);
+ float reduced = predux_min<Packet8f>(af);
+ return Eigen::half(reduced);
+}
+
+template<> EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet8h>(const Packet8h& a) {
+ Packet8f af = half2float(a);
+ float reduced = predux_mul<Packet8f>(af);
+ return Eigen::half(reduced);
+}
+
EIGEN_STRONG_INLINE void
ptranspose(PacketBlock<Packet8h,8>& kernel) {
__m128i a = kernel.packet[0].x;