aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h')
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h51
1 files changed, 40 insertions, 11 deletions
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
index 078be83e0d..c210b1712c 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
@@ -1,6 +1,35 @@
#ifndef THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_
#define THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_
+#ifdef _MSC_VER
+
+#include <immintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+
+#endif
+
+inline int _mm256_extract_epi16_N0(const __m256i X)
+{
+ return _mm_extract_epi16(_mm256_extractf128_si256(X, 0 >> 3), 0 % 8);
+}
+
+inline int _mm256_extract_epi16_N1(const __m256i X)
+{
+ return _mm_extract_epi16(_mm256_extractf128_si256(X, 1 >> 3), 1 % 8);
+}
+
+inline int _mm256_extract_epi8_N0(const __m256i X)
+{
+ return _mm_extract_epi8(_mm256_extractf128_si256((X), 0 >> 4), 0 % 16);
+}
+
+inline int _mm256_extract_epi8_N1(const __m256i X)
+{
+ return _mm_extract_epi8(_mm256_extractf128_si256((X), 1 >> 4), 1 % 16);
+}
+
+
namespace Eigen {
namespace internal {
@@ -271,15 +300,15 @@ EIGEN_STRONG_INLINE QInt32 pfirst<Packet8q32i>(const Packet8q32i& a) {
}
template <>
EIGEN_STRONG_INLINE QInt16 pfirst<Packet16q16i>(const Packet16q16i& a) {
- return _mm256_extract_epi16(a.val, 0);
+ return _mm256_extract_epi16_N0(a.val);
}
template <>
EIGEN_STRONG_INLINE QUInt8 pfirst<Packet32q8u>(const Packet32q8u& a) {
- return static_cast<uint8_t>(_mm256_extract_epi8(a.val, 0));
+ return static_cast<uint8_t>(_mm256_extract_epi8_N0(a.val));
}
template <>
EIGEN_STRONG_INLINE QInt8 pfirst<Packet32q8i>(const Packet32q8i& a) {
- return _mm256_extract_epi8(a.val, 0);
+ return _mm256_extract_epi8_N0(a.val);
}
// Initialize to constant value.
@@ -391,7 +420,7 @@ EIGEN_STRONG_INLINE QInt16 predux_min<Packet16q16i>(const Packet16q16i& a) {
tmp =
_mm256_min_epi16(tmp, _mm256_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 3, 2)));
tmp = _mm256_min_epi16(tmp, _mm256_shuffle_epi32(tmp, 1));
- return std::min(_mm256_extract_epi16(tmp, 0), _mm256_extract_epi16(tmp, 1));
+ return std::min(_mm256_extract_epi16_N0(tmp), _mm256_extract_epi16_N1(tmp));
}
template <>
EIGEN_STRONG_INLINE QInt16 predux_max<Packet16q16i>(const Packet16q16i& a) {
@@ -399,7 +428,7 @@ EIGEN_STRONG_INLINE QInt16 predux_max<Packet16q16i>(const Packet16q16i& a) {
tmp =
_mm256_max_epi16(tmp, _mm256_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 3, 2)));
tmp = _mm256_max_epi16(tmp, _mm256_shuffle_epi32(tmp, 1));
- return std::max(_mm256_extract_epi16(tmp, 0), _mm256_extract_epi16(tmp, 1));
+ return std::max(_mm256_extract_epi16_N0(tmp), _mm256_extract_epi16_N1(tmp));
}
template <>
@@ -410,8 +439,8 @@ EIGEN_STRONG_INLINE QUInt8 predux_min<Packet32q8u>(const Packet32q8u& a) {
tmp = _mm256_min_epu8(tmp, _mm256_shuffle_epi32(tmp, 1));
tmp = _mm256_min_epu8(tmp,
_mm256_shufflelo_epi16(tmp, _MM_SHUFFLE(1, 0, 3, 2)));
- return std::min(static_cast<uint8_t>(_mm256_extract_epi8(tmp, 0)),
- static_cast<uint8_t>(_mm256_extract_epi8(tmp, 1)));
+ return std::min(static_cast<uint8_t>(_mm256_extract_epi8_N0(tmp)),
+ static_cast<uint8_t>(_mm256_extract_epi8_N1(tmp)));
}
template <>
EIGEN_STRONG_INLINE QUInt8 predux_max<Packet32q8u>(const Packet32q8u& a) {
@@ -421,8 +450,8 @@ EIGEN_STRONG_INLINE QUInt8 predux_max<Packet32q8u>(const Packet32q8u& a) {
tmp = _mm256_max_epu8(tmp, _mm256_shuffle_epi32(tmp, 1));
tmp = _mm256_max_epu8(tmp,
_mm256_shufflelo_epi16(tmp, _MM_SHUFFLE(1, 0, 3, 2)));
- return std::max(static_cast<uint8_t>(_mm256_extract_epi8(tmp, 0)),
- static_cast<uint8_t>(_mm256_extract_epi8(tmp, 1)));
+ return std::max(static_cast<uint8_t>(_mm256_extract_epi8_N0(tmp)),
+ static_cast<uint8_t>(_mm256_extract_epi8_N1(tmp)));
}
template <>
@@ -431,7 +460,7 @@ EIGEN_STRONG_INLINE QInt8 predux_min<Packet32q8i>(const Packet32q8i& a) {
tmp = _mm256_min_epi8(tmp, _mm256_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 3, 2)));
tmp = _mm256_min_epi8(tmp, _mm256_shuffle_epi32(tmp, 1));
tmp = _mm256_min_epi8(tmp, _mm256_shufflelo_epi16(tmp, _MM_SHUFFLE(1, 0, 3, 2)));
- return std::min(_mm256_extract_epi8(tmp, 0), _mm256_extract_epi8(tmp, 1));
+ return std::min(_mm256_extract_epi8_N0(tmp), _mm256_extract_epi8_N1(tmp));
}
template <>
EIGEN_STRONG_INLINE QInt8 predux_max<Packet32q8i>(const Packet32q8i& a) {
@@ -439,7 +468,7 @@ EIGEN_STRONG_INLINE QInt8 predux_max<Packet32q8i>(const Packet32q8i& a) {
tmp = _mm256_max_epi8(tmp, _mm256_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 3, 2)));
tmp = _mm256_max_epi8(tmp, _mm256_shuffle_epi32(tmp, 1));
tmp = _mm256_max_epi8(tmp, _mm256_shufflelo_epi16(tmp, _MM_SHUFFLE(1, 0, 3, 2)));
- return std::max(_mm256_extract_epi8(tmp, 0), _mm256_extract_epi8(tmp, 1));
+ return std::max(_mm256_extract_epi8_N0(tmp), _mm256_extract_epi8_N1(tmp));
}
// Vectorized scaling of Packet32q8i by float.