diff options
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 29 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/Complex.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/PacketMath.h | 19 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX512/Complex.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX512/PacketMath.h | 49 | ||||
-rw-r--r-- | Eigen/src/Core/arch/GPU/PacketMathHalf.h | 16 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/Complex.h | 11 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/SSE/PacketMath.h | 11 |
8 files changed, 102 insertions, 53 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 8bdf16e16..8bcceaa7b 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -214,17 +214,13 @@ pxor(const Packet& a, const Packet& b) { return a ^ b; } template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) { return a & (~b); } -/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */ +/** \internal \returns ones */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet -pset1(const typename unpacket_traits<Packet>::type& a) { return a; } +pones(const Packet& /*a*/) { Packet b; memset(&b, 0xff, sizeof(b)); return b;} /** \internal \returns the bitwise not of \a a */ -template<typename Packet> EIGEN_DEVICE_FUNC inline Packet -pnot(const Packet& a) { - typedef typename unpacket_traits<Packet>::type Scalar; - Packet ones = pset1<Packet>(Scalar(1)); - return pandnot(ones, a); -} +template <typename Packet> EIGEN_DEVICE_FUNC inline Packet +pnot(const Packet& a) { return pxor(pones(a), a);} /** \internal \returns \a a shifted by N bits to the right */ template<int N> EIGEN_DEVICE_FUNC inline int @@ -262,24 +258,19 @@ pselect(const Packet& mask, const Packet& a, const Packet& b) { /** \internal \returns a <= b as a bit mask */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet -pcmp_le(const Packet& a, const Packet& b); /* { return a<=b ? pnot(pxor(a,a)) : pxor(a,a); } */ +pcmp_le(const Packet& a, const Packet& b) { return a<=b ? pones(a) : pzero(a); } /** \internal \returns a < b as a bit mask */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet -pcmp_lt(const Packet& a, const Packet& b); /* { return a<b ? pnot(pxor(a,a)) : pxor(a,a); } */ +pcmp_lt(const Packet& a, const Packet& b) { return a<b ? pones(a) : pzero(a); } /** \internal \returns a == b as a bit mask */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet -pcmp_eq(const Packet& a, const Packet& b) -{ - typedef typename unpacket_traits<Packet>::type Scalar; - Packet zeros = pset1<Packet>(Scalar(0)); - return a==b ? pnot(zeros) : zeros; -} +pcmp_eq(const Packet& a, const Packet& b) { return a==b ? pones(a) : pzero(a); } /** \internal \returns a < b or a==NaN or b==NaN as a bit mask */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet -pcmp_lt_or_nan(const Packet& a, const Packet& b); /* { return pnot(pcmp_le(b,a)); } */ +pcmp_lt_or_nan(const Packet& a, const Packet& b) { return pnot(pcmp_le(b,a)); } /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet @@ -289,6 +280,10 @@ pload(const typename unpacket_traits<Packet>::type* from) { return *from; } template<typename Packet> EIGEN_DEVICE_FUNC inline Packet ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; } +/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */ +template<typename Packet> EIGEN_DEVICE_FUNC inline Packet +pset1(const typename unpacket_traits<Packet>::type& a) { return a; } + /** \internal \returns a packet with constant coefficients set from bits */ template<typename Packet,typename BitsType> EIGEN_DEVICE_FUNC inline Packet pset1frombits(BitsType a); diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index 23687c624..9f1bb969e 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -72,10 +72,11 @@ template<> EIGEN_STRONG_INLINE Packet4cf pmul<Packet4cf>(const Packet4cf& a, con template <> EIGEN_STRONG_INLINE Packet4cf pcmp_eq(const Packet4cf& a, const Packet4cf& b) { __m256 eq = _mm256_cmp_ps(a.v, b.v, _CMP_EQ_OQ); - __m256 real_and_imag_equal = _mm256_and_ps(eq, _mm256_permute_ps(eq, 0xb1)); - return Packet4cf(real_and_imag_equal); + return Packet4cf(_mm256_and_ps(eq, _mm256_permute_ps(eq, 0xb1))); } +template<> EIGEN_STRONG_INLINE Packet4cf pones<Packet4cf>(const Packet4cf& a) { return Packet4cf(pones(a.v)); } +template<> EIGEN_STRONG_INLINE Packet4cf pnot<Packet4cf>(const Packet4cf& a) { return Packet4cf(pnot(a.v)); } template<> EIGEN_STRONG_INLINE Packet4cf pand <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet4cf por <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet4cf pxor <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_xor_ps(a.v,b.v)); } @@ -286,10 +287,11 @@ template<> EIGEN_STRONG_INLINE Packet2cd pmul<Packet2cd>(const Packet2cd& a, con template <> EIGEN_STRONG_INLINE Packet2cd pcmp_eq(const Packet2cd& a, const Packet2cd& b) { __m256d eq = _mm256_cmp_pd(a.v, b.v, _CMP_EQ_OQ); - __m256d real_and_imag_equal = _mm256_and_pd(eq, _mm256_permute_pd(eq, 0x5)); - return Packet2cd(real_and_imag_equal); + return Packet2cd(pand(eq, _mm256_permute_pd(eq, 0x5))); } +template<> EIGEN_STRONG_INLINE Packet2cd pones<Packet2cd>(const Packet2cd& a) { return Packet2cd(pones(a.v)); } +template<> EIGEN_STRONG_INLINE Packet2cd pnot<Packet2cd>(const Packet2cd& a) { return Packet2cd(pnot(a.v)); } template<> EIGEN_STRONG_INLINE Packet2cd pand <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cd por <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cd pxor <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_xor_pd(a.v,b.v)); } diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index a6af48f21..f6a514fbf 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -250,6 +250,25 @@ template<> EIGEN_STRONG_INLINE Packet4d pceil<Packet4d>(const Packet4d& a) { ret template<> EIGEN_STRONG_INLINE Packet8f pfloor<Packet8f>(const Packet8f& a) { return _mm256_floor_ps(a); } template<> EIGEN_STRONG_INLINE Packet4d pfloor<Packet4d>(const Packet4d& a) { return _mm256_floor_pd(a); } + +#ifdef EIGEN_VECTORIZE_AVX2 +template<> EIGEN_STRONG_INLINE Packet8i pones<Packet8i>(const Packet8i& a) { + return _mm256_cmpeq_epi64(a,a); +} +#else +template<> EIGEN_STRONG_INLINE Packet8i pones<Packet8i>(const Packet8i& /*a*/) { + const unsigned int o = 0xffffffffu; + return _mm256_set_epi32(o, o, o, o, o, o, o, o); +} +#endif +template<> EIGEN_STRONG_INLINE Packet8f pones<Packet8f>(const Packet8f& a) { + return _mm256_castsi256_ps(pones<Packet8i>(_mm256_castps_si256(a))); +} + +template<> EIGEN_STRONG_INLINE Packet4d pones<Packet4d>(const Packet4d& a) { + return _mm256_castsi256_pd(pones<Packet8i>(_mm256_castpd_si256(a))); +} + template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet8i pand<Packet8i>(const Packet8i& a, const Packet8i& b) { diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h index 2c613f870..154fedc25 100644 --- a/Eigen/src/Core/arch/AVX512/Complex.h +++ b/Eigen/src/Core/arch/AVX512/Complex.h @@ -83,10 +83,7 @@ template<> EIGEN_STRONG_INLINE Packet8cf pandnot<Packet8cf>(const Packet8cf& a, template <> EIGEN_STRONG_INLINE Packet8cf pcmp_eq(const Packet8cf& a, const Packet8cf& b) { __m512 eq = pcmp_eq<Packet16f>(a.v, b.v); - __m512 eq_swap_real_imag = _mm512_permute_ps(eq, 0xB1); - __m512i real_and_imag_equal = _mm512_and_si512( - _mm512_castps_si512(eq), _mm512_castps_si512(eq_swap_real_imag)); - return Packet8cf(_mm512_castsi512_ps(real_and_imag_equal)); + return Packet8cf(pand(eq, _mm512_permute_ps(eq, 0xB1))); } template<> EIGEN_STRONG_INLINE Packet8cf pload <Packet8cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet8cf(pload<Packet16f>(&numext::real_ref(*from))); } @@ -279,10 +276,7 @@ template<> EIGEN_STRONG_INLINE Packet4cd pandnot<Packet4cd>(const Packet4cd& a, template <> EIGEN_STRONG_INLINE Packet4cd pcmp_eq(const Packet4cd& a, const Packet4cd& b) { __m512d eq = pcmp_eq<Packet8d>(a.v, b.v); - __m512d eq_swap_real_imag = _mm512_permute_pd(eq, 0x55); - __m512i real_and_imag_equal = _mm512_and_si512( - _mm512_castpd_si512(eq), _mm512_castpd_si512(eq_swap_real_imag)); - return Packet4cd(_mm512_castsi512_pd(real_and_imag_equal)); + return Packet4cd(pand(eq, _mm512_permute_pd(eq, 0x55))); } template<> EIGEN_STRONG_INLINE Packet4cd pload <Packet4cd>(const std::complex<double>* from) diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 68adf5e57..9666c4e22 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -284,47 +284,56 @@ EIGEN_STRONG_INLINE Packet16f cat256(Packet8f a, Packet8f b) { #endif template<> EIGEN_STRONG_INLINE Packet16f pcmp_le(const Packet16f& a, const Packet16f& b) { - __m256 lo = pcmp_le(extract256<0>(a), extract256<0>(b)); - __m256 hi = pcmp_le(extract256<1>(a), extract256<1>(b)); - return cat256(lo, hi); + __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_LE_OQ); + return _mm512_castsi512_ps( + _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu)); } template<> EIGEN_STRONG_INLINE Packet16f pcmp_lt(const Packet16f& a, const Packet16f& b) { - __m256 lo = pcmp_lt(extract256<0>(a), extract256<0>(b)); - __m256 hi = pcmp_lt(extract256<1>(a), extract256<1>(b)); - return cat256(lo, hi); -} - -template<> EIGEN_STRONG_INLINE Packet16f pcmp_eq(const Packet16f& a, const Packet16f& b) { - __m256 lo = pcmp_eq(extract256<0>(a), extract256<0>(b)); - __m256 hi = pcmp_eq(extract256<1>(a), extract256<1>(b)); - return cat256(lo, hi); + __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ); + return _mm512_castsi512_ps( + _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu)); } template<> EIGEN_STRONG_INLINE Packet16f pcmp_lt_or_nan(const Packet16f& a, const Packet16f& b) { - __m256 lo = pcmp_lt_or_nan(extract256<0>(a), extract256<0>(b)); - __m256 hi = pcmp_lt_or_nan(extract256<1>(a), extract256<1>(b)); - return cat256(lo, hi); + __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_NGT_UQ); + return _mm512_castsi512_ps( + _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu)); } template<> EIGEN_STRONG_INLINE Packet16i pcmp_eq(const Packet16i& a, const Packet16i& b) { - __m256i lo = _mm256_cmpeq_epi32(_mm512_extracti64x4_epi64(a, 0), _mm512_extracti64x4_epi64(b, 0)); - __m256i hi = _mm256_cmpeq_epi32(_mm512_extracti64x4_epi64(a, 1), _mm512_extracti64x4_epi64(b, 1)); - return _mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1); + __mmask16 mask = _mm512_cmp_epi32_mask(a, b, _CMP_EQ_OQ); + return _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu); } template <> EIGEN_STRONG_INLINE Packet16f pcmp_eq(const Packet16f& a, const Packet16f& b) { __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ); return _mm512_castsi512_ps( - _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffff)); + _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu)); } template <> EIGEN_STRONG_INLINE Packet8d pcmp_eq(const Packet8d& a, const Packet8d& b) { __mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ); return _mm512_castsi512_pd( - _mm512_mask_set1_epi64(_mm512_set1_epi64(0), mask, 0xffffffffffffffff)); + _mm512_mask_set1_epi64(_mm512_set1_epi64(0), mask, 0xffffffffffffffffu)); +} + +template <> +EIGEN_STRONG_INLINE Packet16i pones<Packet16i>(const Packet16i& /*a*/) { + const unsigned int o = 0xffffffffu; + return _mm512_set_epi32(o, o, o, o, o, o, o, o, o, o, o, o, o, o, o, o); +} + +template <> +EIGEN_STRONG_INLINE Packet16f pones<Packet16f>(const Packet16f& a) { + return _mm512_castsi512_ps(pones<Packet16i>(_mm512_castps_si512(a))); +} + +template <> +EIGEN_STRONG_INLINE Packet8d pones<Packet8d>(const Packet8d& a) { + return _mm512_castsi512_pd(pones<Packet16i>(_mm512_castpd_si512(a))); } template <> diff --git a/Eigen/src/Core/arch/GPU/PacketMathHalf.h b/Eigen/src/Core/arch/GPU/PacketMathHalf.h index 3e35f96cc..c4dfedcf8 100644 --- a/Eigen/src/Core/arch/GPU/PacketMathHalf.h +++ b/Eigen/src/Core/arch/GPU/PacketMathHalf.h @@ -143,6 +143,10 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& return result; } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pones<half2>(const half2& a) { + half2 result; + *(reinterpret_cast<unsigned*>(&(result))) = 0xffffffffu; +} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<half2,2>& kernel) { @@ -640,6 +644,14 @@ EIGEN_STRONG_INLINE Packet16h float2half(const Packet16f& a) { #endif } +template<> EIGEN_STRONG_INLINE Packet16h pnot(const Packet16h& a) { + Packet16h r; r.x = _mm256_xor_si256(a.x, pcmp_eq(a.x, a.x)); return r; +} + +template<> EIGEN_STRONG_INLINE Packet16h pones(const Packet16h& a) { + Packet16h r; r.x = Packet8i(pones(a.x)); return r; +} + template<> EIGEN_STRONG_INLINE Packet16h por(const Packet16h& a,const Packet16h& b) { // in some cases Packet8i is a wrapper around __m256i, so we need to // cast to Packet8i to call the correct overload. @@ -1085,6 +1097,10 @@ EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f& a) { #endif } +template<> EIGEN_STRONG_INLINE Packet8h pones(const Packet8h& a) { + Packet8h r; r.x = _mm_cmpeq_epi32(a.x, a.x); return r; +} + template<> EIGEN_STRONG_INLINE Packet8h por(const Packet8h& a,const Packet8h& b) { // in some cases Packet4i is a wrapper around __m128i, so we either need to // cast to Packet4i to directly call the intrinsics as below: diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index a7304193b..fa84097ac 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -82,6 +82,9 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con #endif } +template<> EIGEN_STRONG_INLINE Packet2cf pones <Packet2cf>(const Packet2cf& a) { return Packet2cf(pones(a.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pnot <Packet2cf>(const Packet2cf& a) { return Packet2cf(pnot(a.v)); } + template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); } @@ -305,6 +308,8 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con #endif } +template<> EIGEN_STRONG_INLINE Packet1cd pones <Packet1cd>(const Packet1cd& a) { return Packet1cd(pones(a.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pnot <Packet1cd>(const Packet1cd& a) { return Packet1cd(pnot(a.v)); } template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); } @@ -442,15 +447,13 @@ ptranspose(PacketBlock<Packet2cf,2>& kernel) { template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) { __m128 eq = _mm_cmpeq_ps(a.v, b.v); - __m128 real_and_imag_equal = _mm_and_ps(eq, vec4f_swizzle1(eq, 1, 0, 3, 2)); - return Packet2cf(real_and_imag_equal); + return Packet2cf(pand(eq, vec4f_swizzle1(eq, 1, 0, 3, 2))); } template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) { __m128d eq = _mm_cmpeq_pd(a.v, b.v); - __m128d real_and_imag_equal = _mm_and_pd(eq, vec2d_swizzle1(eq, 1, 0)); - return Packet1cd(real_and_imag_equal); + return Packet1cd(pand(eq, vec2d_swizzle1(eq, 1, 0))); } template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) { diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index b8a5497a9..6dd2f8a46 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -378,6 +378,17 @@ template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4 template<> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { return _mm_cmpeq_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { return _mm_cmpnge_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pones<Packet4i>(const Packet4i& a) { return _mm_cmpeq_epi32(a, a); } +template<> EIGEN_STRONG_INLINE Packet4f +pones<Packet4f>(const Packet4f& a) { + Packet4i b = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_cmpeq_epi32(b, b)); +} +template<> EIGEN_STRONG_INLINE Packet2d +pones<Packet2d>(const Packet2d& a) { + Packet4i b = _mm_castpd_si128(a); + return _mm_castsi128_pd(_mm_cmpeq_epi32(b, b)); +} template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); } |