aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h')
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h16
1 files changed, 12 insertions, 4 deletions
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h
index 7b4ecc752f..9561d6a338 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h
@@ -1,5 +1,5 @@
-#ifndef EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_
-#define EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_
+#ifndef CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_
+#define CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_
namespace Eigen {
namespace internal {
@@ -52,8 +52,16 @@ template <>
EIGEN_STRONG_INLINE Packet32q8u
pcast<Packet8q32i, Packet32q8u>(const Packet8q32i& a, const Packet8q32i& b,
const Packet8q32i& c, const Packet8q32i& d) {
+ // _mm256_packus_epi32 trims negative numbers to 0 but we can't allow numbers
+ // that are too large because _mm256_packus_epi16 expects signed input
+ // (example of problem input: 0x11111111, which saturates to 0xffff = -1,
+ // which saturates to 0).
+ const __m256i a_clip = _mm256_min_epi32(a, _mm256_set1_epi32(255));
+ const __m256i b_clip = _mm256_min_epi32(b, _mm256_set1_epi32(255));
+ const __m256i c_clip = _mm256_min_epi32(c, _mm256_set1_epi32(255));
+ const __m256i d_clip = _mm256_min_epi32(d, _mm256_set1_epi32(255));
const __m256i converted = _mm256_packus_epi16(
- _mm256_packs_epi32(a.val, b.val), _mm256_packs_epi32(c.val, d.val));
+ _mm256_packus_epi32(a_clip, b_clip), _mm256_packus_epi32(c_clip, d_clip));
// Since packus does not cross 128 bit lane boundaries,
// we have to permute to properly order the final result.
const __m256i permute_mask = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
@@ -63,4 +71,4 @@ pcast<Packet8q32i, Packet32q8u>(const Packet8q32i& a, const Packet8q32i& b,
} // end namespace internal
} // end namespace Eigen
-#endif // EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_
+#endif // CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_