aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2018-07-06 17:43:11 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2018-07-06 17:43:11 +0200
commitf2dc048df9dddda48cf8945f3a0a61325ffa6c97 (patch)
tree4a9e4e1b45f6875cfb1281d1ea534a41b0947875 /Eigen
parenta937c5020889f95aadccefc79338ed83c2ff8784 (diff)
complete implementation of Packet16h (AVX512)
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/arch/CUDA/PacketMathHalf.h55
1 files changed, 54 insertions, 1 deletions
diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
index 2ee92b4f6..c068351ce 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@@ -486,6 +486,13 @@ EIGEN_STRONG_INLINE Packet16h float2half(const Packet16f& a) {
#endif
}
+template<> EIGEN_STRONG_INLINE Packet16h pnegate(const Packet16h& a) {
+ // FIXME we could do that with bit manipulation
+ Packet16f af = half2float(a);
+ Packet16f rf = pnegate(af);
+ return float2half(rf);
+}
+
template<> EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {
Packet16f af = half2float(a);
Packet16f bf = half2float(b);
@@ -512,6 +519,51 @@ template<> EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& from) {
return half(predux(from_float));
}
+template<> EIGEN_STRONG_INLINE Packet16h preduxp<Packet16h>(const Packet16h* p) {
+ Packet16f pf[16];
+ pf[0] = half2float(p[0]);
+ pf[1] = half2float(p[1]);
+ pf[2] = half2float(p[2]);
+ pf[3] = half2float(p[3]);
+ pf[4] = half2float(p[4]);
+ pf[5] = half2float(p[5]);
+ pf[6] = half2float(p[6]);
+ pf[7] = half2float(p[7]);
+ pf[8] = half2float(p[8]);
+ pf[9] = half2float(p[9]);
+ pf[10] = half2float(p[10]);
+ pf[11] = half2float(p[11]);
+ pf[12] = half2float(p[12]);
+ pf[13] = half2float(p[13]);
+ pf[14] = half2float(p[14]);
+ pf[15] = half2float(p[15]);
+ Packet16f reduced = preduxp<Packet16f>(pf);
+ return float2half(reduced);
+}
+
+template<> EIGEN_STRONG_INLINE Packet16h preverse(const Packet16h& a)
+{
+ __m128i m = _mm_setr_epi8(14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1);
+ Packet16h res;
+ res.x = _mm256_set_m128i(_mm_shuffle_epi8(_mm256_extractf128_si256(a.x,0),m),
+ _mm_shuffle_epi8(_mm256_extractf128_si256(a.x,1),m));
+ return res;
+}
+
+template<> EIGEN_STRONG_INLINE Packet16h pinsertfirst(const Packet16h& a, Eigen::half b)
+{
+ Packet16h res;
+ res.x = _mm256_insert_epi16(a.x,b.x,0);
+ return res;
+}
+
+template<> EIGEN_STRONG_INLINE Packet16h pinsertlast(const Packet16h& a, Eigen::half b)
+{
+ Packet16h res;
+ res.x = _mm256_insert_epi16(a.x,b.x,15);
+ return res;
+}
+
template<> EIGEN_STRONG_INLINE Packet16h pgather<Eigen::half, Packet16h>(const Eigen::half* from, Index stride)
{
Packet16h result;
@@ -854,6 +906,7 @@ EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f& a) {
template<> EIGEN_STRONG_INLINE Packet8h pconj(const Packet8h& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet8h pnegate(const Packet8h& a) {
+ // FIXME we could do that with bit manipulation
Packet8f af = half2float(a);
Packet8f rf = pnegate(af);
return float2half(rf);
@@ -957,7 +1010,7 @@ template<> EIGEN_STRONG_INLINE Packet8h pinsertfirst(const Packet8h& a, Eigen::h
template<> EIGEN_STRONG_INLINE Packet8h pinsertlast(const Packet8h& a, Eigen::half b)
{
Packet8h res;
- res.x = _mm_insert_epi16(a.x,int(b.x),15);
+ res.x = _mm_insert_epi16(a.x,int(b.x),7);
return res;
}