diff options
Diffstat (limited to 'third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h')
-rw-r--r-- | third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h index 7b4ecc752f..9561d6a338 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h @@ -1,5 +1,5 @@ -#ifndef EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ -#define EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ +#ifndef CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ +#define CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ namespace Eigen { namespace internal { @@ -52,8 +52,16 @@ template <> EIGEN_STRONG_INLINE Packet32q8u pcast<Packet8q32i, Packet32q8u>(const Packet8q32i& a, const Packet8q32i& b, const Packet8q32i& c, const Packet8q32i& d) { + // _mm256_packus_epi32 trims negative numbers to 0 but we can't allow numbers + // that are too large because _mm256_packus_epi16 expects signed input + // (example of problem input: 0x11111111, which saturates to 0xffff = -1, + // which saturates to 0). + const __m256i a_clip = _mm256_min_epi32(a, _mm256_set1_epi32(255)); + const __m256i b_clip = _mm256_min_epi32(b, _mm256_set1_epi32(255)); + const __m256i c_clip = _mm256_min_epi32(c, _mm256_set1_epi32(255)); + const __m256i d_clip = _mm256_min_epi32(d, _mm256_set1_epi32(255)); const __m256i converted = _mm256_packus_epi16( - _mm256_packs_epi32(a.val, b.val), _mm256_packs_epi32(c.val, d.val)); + _mm256_packus_epi32(a_clip, b_clip), _mm256_packus_epi32(c_clip, d_clip)); // Since packus does not cross 128 bit lane boundaries, // we have to permute to properly order the final result. const __m256i permute_mask = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); @@ -63,4 +71,4 @@ pcast<Packet8q32i, Packet32q8u>(const Packet8q32i& a, const Packet8q32i& b, } // end namespace internal } // end namespace Eigen -#endif // EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ +#endif // CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ |