From 3e975ea978bac4d861bb09328b06f3c316212611 Mon Sep 17 00:00:00 2001 From: Andrew Harp Date: Wed, 1 Mar 2017 17:59:22 -0800 Subject: Merge changes from github. Change: 148954491 --- .../Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h | 98 ++++++++++++++++++++++ 1 file changed, 98 insertions(+) (limited to 'third_party/eigen3/unsupported/Eigen') diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h index 98deb1742e..078be83e0d 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h @@ -11,6 +11,13 @@ typedef struct Packet32q8i { Packet32q8i(__m256i val) : val(val) {} } Packet32q8i; +typedef struct Packet16q16i { + __m256i val; + operator __m256i() const { return val; } + Packet16q16i(); + Packet16q16i(__m256i val) : val(val) {} +} Packet16q16i; + typedef struct Packet32q8u { __m256i val; operator __m256i() const { return val; } @@ -32,6 +39,13 @@ typedef struct Packet16q8u { Packet16q8u(__m128i val) : val(val) {} } Packet16q8u; +typedef struct Packet8q16i { + __m128i val; + operator __m128i() const { return val; } + Packet8q16i(); + Packet8q16i(__m128i val) : val(val) {} +} Packet8q16i; + typedef struct Packet8q32i { __m256i val; operator __m256i() const { return val; } @@ -92,6 +106,28 @@ struct packet_traits : default_packet_traits { }; }; template <> +struct packet_traits : default_packet_traits { + typedef Packet16q16i type; + typedef Packet8q16i half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = 16, + }; + enum { + HasAdd = 0, + HasSub = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 0, + HasSetLinear = 0 + }; +}; +template <> struct packet_traits : default_packet_traits { typedef Packet8q32i type; typedef Packet4q32i half; @@ -122,6 +158,12 @@ struct unpacket_traits { enum { size = 32, alignment=Aligned32 }; }; template <> +struct unpacket_traits { + typedef QInt16 type; + typedef Packet8q16i half; + enum { size = 16, alignment=Aligned32 }; +}; +template <> struct unpacket_traits { typedef QUInt8 type; typedef Packet16q8u half; @@ -146,6 +188,11 @@ EIGEN_STRONG_INLINE Packet32q8u ploadu(const QUInt8* from) { reinterpret_cast(from)); } template <> +EIGEN_STRONG_INLINE Packet16q16i ploadu(const QInt16* from) { + EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256( + reinterpret_cast(from)); +} +template <> EIGEN_STRONG_INLINE Packet8q32i ploadu(const QInt32* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256( reinterpret_cast(from)); @@ -163,6 +210,11 @@ EIGEN_STRONG_INLINE Packet32q8u pload(const QUInt8* from) { reinterpret_cast(from)); } template <> +EIGEN_STRONG_INLINE Packet16q16i pload(const QInt16* from) { + EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256( + reinterpret_cast(from)); +} +template <> EIGEN_STRONG_INLINE Packet8q32i pload(const QInt32* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256( reinterpret_cast(from)); @@ -180,6 +232,11 @@ EIGEN_STRONG_INLINE void pstoreu(QUInt8* to, const Packet32q8u& from) { reinterpret_cast<__m256i*>(to), from.val); } template <> +EIGEN_STRONG_INLINE void pstoreu(QInt16* to, const Packet16q16i& from) { + EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256( + reinterpret_cast<__m256i*>(to), from.val); +} +template <> EIGEN_STRONG_INLINE void pstoreu(QInt32* to, const Packet8q32i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256( reinterpret_cast<__m256i*>(to), from.val); @@ -192,6 +249,11 @@ EIGEN_STRONG_INLINE void pstore(QInt32* to, const Packet8q32i& from) { from.val); } template <> +EIGEN_STRONG_INLINE void pstore(QInt16* to, const Packet16q16i& from) { + EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to), + from.val); +} +template <> EIGEN_STRONG_INLINE void pstore(QUInt8* to, const Packet32q8u& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to), from.val); @@ -208,6 +270,10 @@ EIGEN_STRONG_INLINE QInt32 pfirst(const Packet8q32i& a) { return _mm_cvtsi128_si32(_mm256_castsi256_si128(a)); } template <> +EIGEN_STRONG_INLINE QInt16 pfirst(const Packet16q16i& a) { + return _mm256_extract_epi16(a.val, 0); +} +template <> EIGEN_STRONG_INLINE QUInt8 pfirst(const Packet32q8u& a) { return static_cast(_mm256_extract_epi8(a.val, 0)); } @@ -237,6 +303,10 @@ EIGEN_STRONG_INLINE Packet8q32i padd(const Packet8q32i& a, return _mm256_add_epi32(a.val, b.val); } template <> +EIGEN_STRONG_INLINE Packet16q16i pset1(const QInt16& from) { + return _mm256_set1_epi16(from.value); +} +template <> EIGEN_STRONG_INLINE Packet8q32i psub(const Packet8q32i& a, const Packet8q32i& b) { return _mm256_sub_epi32(a.val, b.val); @@ -264,6 +334,17 @@ EIGEN_STRONG_INLINE Packet8q32i pmax(const Packet8q32i& a, return _mm256_max_epi32(a.val, b.val); } +template <> +EIGEN_STRONG_INLINE Packet16q16i pmin(const Packet16q16i& a, + const Packet16q16i& b) { + return _mm256_min_epi16(a.val, b.val); +} +template <> +EIGEN_STRONG_INLINE Packet16q16i pmax(const Packet16q16i& a, + const Packet16q16i& b) { + return _mm256_max_epi16(a.val, b.val); +} + template <> EIGEN_STRONG_INLINE Packet32q8u pmin(const Packet32q8u& a, const Packet32q8u& b) { @@ -304,6 +385,23 @@ EIGEN_STRONG_INLINE QInt32 predux_max(const Packet8q32i& a) { _mm256_max_epi32(tmp, _mm256_shuffle_epi32(tmp, 1))); } +template <> +EIGEN_STRONG_INLINE QInt16 predux_min(const Packet16q16i& a) { + __m256i tmp = _mm256_min_epi16(a, _mm256_permute2f128_si256(a, a, 1)); + tmp = + _mm256_min_epi16(tmp, _mm256_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 3, 2))); + tmp = _mm256_min_epi16(tmp, _mm256_shuffle_epi32(tmp, 1)); + return std::min(_mm256_extract_epi16(tmp, 0), _mm256_extract_epi16(tmp, 1)); +} +template <> +EIGEN_STRONG_INLINE QInt16 predux_max(const Packet16q16i& a) { + __m256i tmp = _mm256_max_epi16(a, _mm256_permute2f128_si256(a, a, 1)); + tmp = + _mm256_max_epi16(tmp, _mm256_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 3, 2))); + tmp = _mm256_max_epi16(tmp, _mm256_shuffle_epi32(tmp, 1)); + return std::max(_mm256_extract_epi16(tmp, 0), _mm256_extract_epi16(tmp, 1)); +} + template <> EIGEN_STRONG_INLINE QUInt8 predux_min(const Packet32q8u& a) { __m256i tmp = _mm256_min_epu8(a, _mm256_permute2f128_si256(a, a, 1)); -- cgit v1.2.3