aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-04-29 13:30:13 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-04-29 13:30:13 -0700
commitfa5a8f055aebbf4f39fca26e857351103fab4d11 (patch)
tree87dc033bdfafd8ad1ff3ee409dd1cfb4eb10b158
parentef3ac9d05a7a6b06ead65b5baafb66918fb2031b (diff)
Implemented palign_impl for AVX512
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h38
1 files changed, 34 insertions, 4 deletions
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 8392c4673..68e86ae7c 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -54,7 +54,6 @@ template<> struct packet_traits<float> : default_packet_traits
AlignedOnScalar = 1,
size = 16,
HasHalfPacket = 1,
- HasDiv = 1,
#if EIGEN_GNUC_AT_LEAST(5, 3)
#ifdef EIGEN_VECTORIZE_AVX512DQ
HasLog = 1,
@@ -63,6 +62,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasSqrt = 1,
HasRsqrt = 1,
#endif
+ HasDiv = 1
};
};
template<> struct packet_traits<double> : default_packet_traits
@@ -997,9 +997,26 @@ EIGEN_STRONG_INLINE double predux_max<Packet8d>(const Packet8d& a) {
template <int Offset>
struct palign_impl<Offset, Packet16f> {
- static EIGEN_STRONG_INLINE void run(Packet16f& first, const Packet16f& second) {
+ static EIGEN_STRONG_INLINE void run(Packet16f& first,
+ const Packet16f& second) {
if (Offset != 0) {
- assert(false && "To be implemented");
+ __m512i first_idx = _mm512_set_epi32(
+ Offset + 15, Offset + 14, Offset + 13, Offset + 12, Offset + 11,
+ Offset + 10, Offset + 9, Offset + 8, Offset + 7, Offset + 6,
+ Offset + 5, Offset + 4, Offset + 3, Offset + 2, Offset + 1, Offset);
+
+ __m512i second_idx =
+ _mm512_set_epi32(Offset - 1, Offset - 2, Offset - 3, Offset - 4,
+ Offset - 5, Offset - 6, Offset - 7, Offset - 8,
+ Offset - 9, Offset - 10, Offset - 11, Offset - 12,
+ Offset - 13, Offset - 14, Offset - 15, Offset - 16);
+
+ unsigned short mask = 0xFFFF;
+ mask <<= (16 - Offset);
+
+ first = _mm512_permutexvar_ps(first_idx, first);
+ Packet16f tmp = _mm512_permutexvar_ps(second_idx, second);
+ first = _mm512_mask_blend_ps(mask, first, tmp);
}
}
};
@@ -1007,7 +1024,20 @@ template <int Offset>
struct palign_impl<Offset, Packet8d> {
static EIGEN_STRONG_INLINE void run(Packet8d& first, const Packet8d& second) {
if (Offset != 0) {
- assert(false && "To be implemented");
+ __m512i first_idx = _mm512_set_epi32(
+ 0, Offset + 7, 0, Offset + 6, 0, Offset + 5, 0, Offset + 4, 0,
+ Offset + 3, 0, Offset + 2, 0, Offset + 1, 0, Offset);
+
+ __m512i second_idx = _mm512_set_epi32(
+ 0, Offset - 1, 0, Offset - 2, 0, Offset - 3, 0, Offset - 4, 0,
+ Offset - 5, 0, Offset - 6, 0, Offset - 7, 0, Offset - 8);
+
+ unsigned char mask = 0xFF;
+ mask <<= (8 - Offset);
+
+ first = _mm512_permutexvar_pd(first_idx, first);
+ Packet8d tmp = _mm512_permutexvar_pd(second_idx, second);
+ first = _mm512_mask_blend_pd(mask, first, tmp);
}
}
};