aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h')
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h53
1 files changed, 43 insertions, 10 deletions
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h
index 26735743d4..a09eac6707 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h
@@ -1,5 +1,5 @@
-#ifndef EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_
-#define EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_
+#ifndef CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_
+#define CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_
namespace Eigen {
namespace internal {
@@ -132,8 +132,15 @@ pcast<Packet16q32i, Packet64q8i>(const Packet16q32i& a,
const Packet16q32i& b,
const Packet16q32i& c,
const Packet16q32i& d) {
- __m512i converted = _mm512_packs_epi16(_mm512_packs_epi32(a.val, b.val),
- _mm512_packs_epi32(c.val, d.val));
+ __m128i a_part = _mm512_cvtsepi32_epi8(a);
+ __m128i b_part = _mm512_cvtsepi32_epi8(b);
+ __m128i c_part = _mm512_cvtsepi32_epi8(c);
+ __m128i d_part = _mm512_cvtsepi32_epi8(d);
+ __m256i ab =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(a_part), b_part, 1);
+ __m256i cd =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(c_part), d_part, 1);
+ __m512i converted = _mm512_inserti64x4(_mm512_castsi256_si512(ab), cd, 1);
return converted;
}
@@ -141,7 +148,10 @@ template <>
EIGEN_STRONG_INLINE Packet32q16i
pcast<Packet16q32i, Packet32q16i>(const Packet16q32i& a,
const Packet16q32i& b) {
- __m512i converted = _mm512_packs_epi32(a.val, b.val);
+ __m256i a_part = _mm512_cvtsepi32_epi16(a);
+ __m256i b_part = _mm512_cvtsepi32_epi16(b);
+ __m512i converted =
+ _mm512_inserti64x4(_mm512_castsi256_si512(a_part), b_part, 1);
return converted;
}
@@ -154,22 +164,45 @@ template <>
EIGEN_STRONG_INLINE Packet64q8u
pcast<Packet16q32i, Packet64q8u>(const Packet16q32i& a, const Packet16q32i& b,
const Packet16q32i& c, const Packet16q32i& d) {
- const __m512i converted = _mm512_packus_epi16(
- _mm512_packus_epi32(a.val, b.val), _mm512_packus_epi32(c.val, d.val));
+ // Brute-force saturation since there isn't a pack operation for unsigned
+ // numbers that keeps the elements in order.
+ __m128i a_part = _mm512_cvtepi32_epi8(_mm512_max_epi32(
+ _mm512_min_epi32(a, _mm512_set1_epi32(255)), _mm512_setzero_si512()));
+ __m128i b_part = _mm512_cvtepi32_epi8(_mm512_max_epi32(
+ _mm512_min_epi32(b, _mm512_set1_epi32(255)), _mm512_setzero_si512()));
+ __m128i c_part = _mm512_cvtepi32_epi8(_mm512_max_epi32(
+ _mm512_min_epi32(c, _mm512_set1_epi32(255)), _mm512_setzero_si512()));
+ __m128i d_part = _mm512_cvtepi32_epi8(_mm512_max_epi32(
+ _mm512_min_epi32(d, _mm512_set1_epi32(255)), _mm512_setzero_si512()));
+ __m256i ab =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(a_part), b_part, 1);
+ __m256i cd =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(c_part), d_part, 1);
+ __m512i converted = _mm512_inserti64x4(_mm512_castsi256_si512(ab), cd, 1);
return converted;
}
+#if 0
+// The type Packet32q16u does not exist for AVX-512 yet
template <>
struct type_casting_traits<QInt32, QUInt16> {
enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
};
-#if 0
template <>
EIGEN_STRONG_INLINE Packet32q16u
pcast<Packet16q32i, Packet32q16u>(const Packet16q32i& a,
const Packet16q32i& b) {
- const __m512i converted = _mm512_packus_epi32(a.val, b.val);
+ // Brute-force saturation since there isn't a pack operation for unsigned
+ // numbers that keeps the elements in order.
+ __m256i a_part =
+ _mm512_cvtepi32_epi16(_mm512_max_epi32(
+ _mm512_min_epi32(a, _mm512_set1_epi32(65535)), _mm512_setzero_si512()));
+ __m256i b_part = _mm512_cvtepi32_epi16(
+ _mm512_max_epi32(_mm512_min_epi32(b, _mm512_set1_epi32(65535)),
+ _mm512_setzero_si512()));
+ __m512i converted =
+ _mm512_inserti64x4(_mm512_castsi256_si512(a_part), b_part, 1);
return converted;
}
#endif
@@ -177,4 +210,4 @@ pcast<Packet16q32i, Packet32q16u>(const Packet16q32i& a,
} // end namespace internal
} // end namespace Eigen
-#endif // EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_
+#endif // CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_