diff options
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 67 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/Complex.h | 5 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/PacketMath.h | 5 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX512/Complex.h | 5 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX512/PacketMath.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 57 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/Complex.h | 5 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/SSE/PacketMath.h | 175 | ||||
-rw-r--r-- | test/packetmath.cpp | 66 |
9 files changed, 243 insertions, 152 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 449793372..d25b45ab0 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -58,7 +58,6 @@ struct default_packet_traits HasConj = 1, HasSetLinear = 1, HasBlend = 0, - HasInsert = 0, HasDiv = 0, HasSqrt = 0, @@ -191,8 +190,10 @@ psub(const Packet& a, const Packet& b) { return a-b; } template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) { return -a; } -/** \internal \returns conj(a) (coeff-wise) */ +template<> EIGEN_DEVICE_FUNC inline bool +pnegate(const bool& a) { return !a; } +/** \internal \returns conj(a) (coeff-wise) */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pconj(const Packet& a) { return numext::conj(a); } @@ -269,38 +270,9 @@ pldexp(const Packet &a, const Packet &exponent) { return ldexp(a, static_cast<int>(exponent)); } -// Notice: The following ops accept and operator on bitwise masks. -// The value of each field in a masks is Scalar(0) or ~Scalar(0). -// For boolean packet like Packet16b, this is different from the -// representation of true and false, which are 1 and 0. -// As an example -// ptrue<Packet16b>() = 0xffffffffffffffffffffffffffffffff -// while -// pset1<Packet16b>(true) = 0x01010101010101010101010101010101 - -/** \internal \returns the bitwise and of \a a and \a b */ +/** \internal \returns zero bits */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet -pand(const Packet& a, const Packet& b) { return a & b; } - -/** \internal \returns the bitwise or of \a a and \a b */ -template<typename Packet> EIGEN_DEVICE_FUNC inline Packet -por(const Packet& a, const Packet& b) { return a | b; } - -/** \internal \returns the bitwise xor of \a a and \a b */ -template<typename Packet> EIGEN_DEVICE_FUNC inline Packet -pxor(const Packet& a, const Packet& b) { return a ^ b; } - -/** \internal \returns the bitwise and of \a a and not \a b */ -template<typename Packet> EIGEN_DEVICE_FUNC inline Packet -pandnot(const Packet& a, const Packet& b) { return a & (~b); } - -/** \internal \returns ones */ -template<typename Packet> EIGEN_DEVICE_FUNC inline Packet -ptrue(const Packet& /*a*/) { Packet b; memset((void*)&b, 0xff, sizeof(b)); return b;} - -/** \internal \returns zeros */ -template<typename Packet> EIGEN_DEVICE_FUNC inline Packet -pzero(const Packet& a) { return pxor(a,a); } +pzero(const Packet& /*a*/) { Packet b; memset((void*)&b, 0, sizeof(b)); return b;} template<> EIGEN_DEVICE_FUNC inline float pzero<float>(const float& a) { EIGEN_UNUSED_VARIABLE(a); @@ -312,6 +284,10 @@ template<> EIGEN_DEVICE_FUNC inline double pzero<double>(const double& a) { return 0.; } +/** \internal \returns one bits */ +template<typename Packet> EIGEN_DEVICE_FUNC inline Packet +ptrue(const Packet& /*a*/) { Packet b; memset((void*)&b, 0xff, sizeof(b)); return b;} + template <typename RealScalar> EIGEN_DEVICE_FUNC inline std::complex<RealScalar> ptrue(const std::complex<RealScalar>& /*a*/) { RealScalar b; @@ -319,9 +295,21 @@ EIGEN_DEVICE_FUNC inline std::complex<RealScalar> ptrue(const std::complex<RealS return std::complex<RealScalar>(b, b); } -/** \internal \returns the bitwise not of \a a */ -template <typename Packet> EIGEN_DEVICE_FUNC inline Packet -pnot(const Packet& a) { return pxor(ptrue(a), a);} +/** \internal \returns the bitwise and of \a a and \a b */ +template<typename Packet> EIGEN_DEVICE_FUNC inline Packet +pand(const Packet& a, const Packet& b) { return a & b; } + +/** \internal \returns the bitwise or of \a a and \a b */ +template<typename Packet> EIGEN_DEVICE_FUNC inline Packet +por(const Packet& a, const Packet& b) { return a | b; } + +/** \internal \returns the bitwise xor of \a a and \a b */ +template<typename Packet> EIGEN_DEVICE_FUNC inline Packet +pxor(const Packet& a, const Packet& b) { return a ^ b; } + +/** \internal \returns the bitwise and of \a a and not \a b */ +template<typename Packet> EIGEN_DEVICE_FUNC inline Packet +pandnot(const Packet& a, const Packet& b) { return pand(a, pxor(ptrue(b), b)); } /** \internal \returns a <= b as a bit mask */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet @@ -337,7 +325,7 @@ pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); } /** \internal \returns a < b or a==NaN or b==NaN as a bit mask */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet -pcmp_lt_or_nan(const Packet& a, const Packet& b) { return pnot(pcmp_le(b,a)); } +pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); } /** \internal \returns \a or \b for each field in packet according to \mask */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet @@ -355,7 +343,10 @@ template<> EIGEN_DEVICE_FUNC inline double pselect<double>( return numext::equal_strict(cond,0.) ? b : a; } - +template<> EIGEN_DEVICE_FUNC inline bool pselect<bool>( + const bool& cond, const bool& a, const bool& b) { + return cond ? a : b; +} /** \internal \returns the min of \a a and \a b (coeff-wise) */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index c2d5205f2..23568cae9 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -42,8 +42,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits HasAbs2 = 0, HasMin = 0, HasMax = 0, - HasSetLinear = 0, - HasInsert = 1 + HasSetLinear = 0 }; }; #endif @@ -77,7 +76,6 @@ EIGEN_STRONG_INLINE Packet4cf pcmp_eq(const Packet4cf& a, const Packet4cf& b) { } template<> EIGEN_STRONG_INLINE Packet4cf ptrue<Packet4cf>(const Packet4cf& a) { return Packet4cf(ptrue(Packet8f(a.v))); } -template<> EIGEN_STRONG_INLINE Packet4cf pnot<Packet4cf>(const Packet4cf& a) { return Packet4cf(pnot(Packet8f(a.v))); } template<> EIGEN_STRONG_INLINE Packet4cf pand <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet4cf por <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet4cf pxor <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_xor_ps(a.v,b.v)); } @@ -267,7 +265,6 @@ EIGEN_STRONG_INLINE Packet2cd pcmp_eq(const Packet2cd& a, const Packet2cd& b) { } template<> EIGEN_STRONG_INLINE Packet2cd ptrue<Packet2cd>(const Packet2cd& a) { return Packet2cd(ptrue(Packet4d(a.v))); } -template<> EIGEN_STRONG_INLINE Packet2cd pnot<Packet2cd>(const Packet2cd& a) { return Packet2cd(pnot(Packet4d(a.v))); } template<> EIGEN_STRONG_INLINE Packet2cd pand <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cd por <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cd pxor <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_xor_pd(a.v,b.v)); } diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 35a329e3f..b27a62cbc 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -62,7 +62,6 @@ template<> struct packet_traits<float> : default_packet_traits AlignedOnScalar = 1, size = 8, HasHalfPacket = 1, - HasInsert = 1, HasDiv = 1, HasSin = EIGEN_FAST_MATH, @@ -93,7 +92,6 @@ template<> struct packet_traits<double> : default_packet_traits AlignedOnScalar = 1, size=4, HasHalfPacket = 1, - HasInsert = 1, HasDiv = 1, HasExp = 1, @@ -131,8 +129,7 @@ struct packet_traits<Eigen::half> : default_packet_traits { HasRsqrt = 0, HasExp = 0, HasLog = 0, - HasBlend = 0, - HasInsert = 1 + HasBlend = 0 }; }; #endif diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h index dc2ae0a35..747f7a5d5 100644 --- a/Eigen/src/Core/arch/AVX512/Complex.h +++ b/Eigen/src/Core/arch/AVX512/Complex.h @@ -41,8 +41,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits HasAbs2 = 0, HasMin = 0, HasMax = 0, - HasSetLinear = 0, - HasInsert = 1 + HasSetLinear = 0 }; }; @@ -59,7 +58,6 @@ template<> struct unpacket_traits<Packet8cf> { }; template<> EIGEN_STRONG_INLINE Packet8cf ptrue<Packet8cf>(const Packet8cf& a) { return Packet8cf(ptrue(Packet16f(a.v))); } -template<> EIGEN_STRONG_INLINE Packet8cf pnot<Packet8cf>(const Packet8cf& a) { return Packet8cf(pnot(Packet16f(a.v))); } template<> EIGEN_STRONG_INLINE Packet8cf padd<Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(_mm512_add_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet8cf psub<Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(_mm512_sub_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet8cf pnegate(const Packet8cf& a) @@ -266,7 +264,6 @@ template<> EIGEN_STRONG_INLINE Packet4cd pmul<Packet4cd>(const Packet4cd& a, con } template<> EIGEN_STRONG_INLINE Packet4cd ptrue<Packet4cd>(const Packet4cd& a) { return Packet4cd(ptrue(Packet8d(a.v))); } -template<> EIGEN_STRONG_INLINE Packet4cd pnot<Packet4cd>(const Packet4cd& a) { return Packet4cd(pnot(Packet8d(a.v))); } template<> EIGEN_STRONG_INLINE Packet4cd pand <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(pand(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet4cd por <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(por(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet4cd pxor <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(pxor(a.v,b.v)); } diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 10a1d4adb..ad37ad620 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -73,8 +73,7 @@ struct packet_traits<half> : default_packet_traits { HasRsqrt = 0, HasExp = 0, HasLog = 0, - HasBlend = 0, - HasInsert = 1 + HasBlend = 0 }; }; @@ -88,7 +87,6 @@ template<> struct packet_traits<float> : default_packet_traits size = 16, HasHalfPacket = 1, HasBlend = 0, - HasInsert = 1, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, #if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT) @@ -117,7 +115,6 @@ template<> struct packet_traits<double> : default_packet_traits AlignedOnScalar = 1, size = 8, HasHalfPacket = 1, - HasInsert = 1, #if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT) HasSqrt = EIGEN_FAST_MATH, HasRsqrt = EIGEN_FAST_MATH, @@ -1323,11 +1320,6 @@ template<> EIGEN_STRONG_INLINE Packet16h ptrue(const Packet16h& a) { return ptrue(Packet8i(a)); } -template<> EIGEN_STRONG_INLINE Packet16h pnot(const Packet16h& a) { - return _mm256_xor_si256(a, ptrue(a)); -} - - template<> EIGEN_STRONG_INLINE Packet16h por(const Packet16h& a,const Packet16h& b) { // in some cases Packet8i is a wrapper around __m256i, so we need to // cast to Packet8i to call the correct overload. diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index ee5a938b9..e11af1dca 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -136,7 +136,6 @@ struct packet_traits<float> : default_packet_traits HasConj = 1, HasSetLinear = 0, HasBlend = 0, - HasInsert = 1, HasDiv = 1, HasFloor = 1, @@ -177,8 +176,7 @@ struct packet_traits<int8_t> : default_packet_traits HasMax = 1, HasConj = 1, HasSetLinear = 0, - HasBlend = 0, - HasInsert = 1, + HasBlend = 0 }; }; @@ -209,7 +207,6 @@ struct packet_traits<uint8_t> : default_packet_traits HasConj = 1, HasSetLinear = 0, HasBlend = 0, - HasInsert = 1, HasSqrt = 1 }; @@ -241,8 +238,7 @@ struct packet_traits<int16_t> : default_packet_traits HasMax = 1, HasConj = 1, HasSetLinear = 0, - HasBlend = 0, - HasInsert = 1, + HasBlend = 0 }; }; @@ -273,8 +269,6 @@ struct packet_traits<uint16_t> : default_packet_traits HasConj = 1, HasSetLinear = 0, HasBlend = 0, - HasInsert = 1, - HasSqrt = 1 }; }; @@ -305,8 +299,7 @@ struct packet_traits<int32_t> : default_packet_traits HasMax = 1, HasConj = 1, HasSetLinear = 0, - HasBlend = 0, - HasInsert = 1, + HasBlend = 0 }; }; @@ -337,7 +330,6 @@ struct packet_traits<uint32_t> : default_packet_traits HasConj = 1, HasSetLinear = 0, HasBlend = 0, - HasInsert = 1, HasSqrt = 1 }; @@ -370,8 +362,7 @@ struct packet_traits<int64_t> : default_packet_traits HasMax = 1, HasConj = 1, HasSetLinear = 0, - HasBlend = 0, - HasInsert = 1, + HasBlend = 0 }; }; @@ -402,8 +393,7 @@ struct packet_traits<uint64_t> : default_packet_traits HasMax = 1, HasConj = 1, HasSetLinear = 0, - HasBlend = 0, - HasInsert = 1, + HasBlend = 0 }; }; @@ -1498,42 +1488,6 @@ template<> EIGEN_STRONG_INLINE Packet2l pandnot<Packet2l>(const Packet2l& a, con template<> EIGEN_STRONG_INLINE Packet2ul pandnot<Packet2ul>(const Packet2ul& a, const Packet2ul& b) { return vbicq_u64(a,b); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2f pnot<Packet2f>(const Packet2f& a) -{ return vreinterpret_f32_u32(vmvn_u32(vreinterpret_u32_f32(a))); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4f pnot<Packet4f>(const Packet4f& a) -{ return vreinterpretq_f32_u32(vmvnq_u32(vreinterpretq_u32_f32(a))); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4c pnot<Packet4c>(const Packet4c& a) -{ return ~a; } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8c pnot<Packet8c>(const Packet8c& a) -{ return vmvn_s8(a); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet16c pnot<Packet16c>(const Packet16c& a) -{ return vmvnq_s8(a); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4uc pnot<Packet4uc>(const Packet4uc& a) -{ return ~a; } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8uc pnot<Packet8uc>(const Packet8uc& a) -{ return vmvn_u8(a); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet16uc pnot<Packet16uc>(const Packet16uc& a) -{ return vmvnq_u8(a); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4s pnot<Packet4s>(const Packet4s& a) -{ return vmvn_s16(a); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8s pnot<Packet8s>(const Packet8s& a) -{ return vmvnq_s16(a); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4us pnot<Packet4us>(const Packet4us& a) -{ return vmvn_u16(a); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8us pnot<Packet8us>(const Packet8us& a) -{ return vmvnq_u16(a); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2i pnot<Packet2i>(const Packet2i& a) -{ return vmvn_s32(a); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4i pnot<Packet4i>(const Packet4i& a) -{ return vmvnq_s32(a); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2ui pnot<Packet2ui>(const Packet2ui& a) -{ return vmvn_u32(a); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4ui pnot<Packet4ui>(const Packet4ui& a) -{ return vmvnq_u32(a); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2l pnot<Packet2l>(const Packet2l& a) -{ return vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a))); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2ul pnot<Packet2ul>(const Packet2ul& a) -{ return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(a))); } template<int N> EIGEN_STRONG_INLINE Packet4c parithmetic_shift_right(Packet4c& a) { return vget_lane_s32(vreinterpret_s32_s8(vshr_n_s8(vreinterpret_s8_s32(vdup_n_s32(a)), N)), 0); } @@ -3218,7 +3172,6 @@ template<> struct packet_traits<double> : default_packet_traits HasConj = 1, HasSetLinear = 0, HasBlend = 0, - HasInsert = 1, HasDiv = 1, HasFloor = 1, diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 8bf8bfe85..0d322a2a1 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -45,8 +45,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits HasMin = 0, HasMax = 0, HasSetLinear = 0, - HasBlend = 1, - HasInsert = 1 + HasBlend = 1 }; }; #endif @@ -84,7 +83,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con } template<> EIGEN_STRONG_INLINE Packet2cf ptrue <Packet2cf>(const Packet2cf& a) { return Packet2cf(ptrue(Packet4f(a.v))); } -template<> EIGEN_STRONG_INLINE Packet2cf pnot <Packet2cf>(const Packet2cf& a) { return Packet2cf(pnot(Packet4f(a.v))); } template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); } @@ -292,7 +290,6 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con } template<> EIGEN_STRONG_INLINE Packet1cd ptrue <Packet1cd>(const Packet1cd& a) { return Packet1cd(ptrue(Packet2d(a.v))); } -template<> EIGEN_STRONG_INLINE Packet1cd pnot <Packet1cd>(const Packet1cd& a) { return Packet1cd(pnot(Packet2d(a.v))); } template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); } diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 645aee0cd..9f2e922a2 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -108,7 +108,6 @@ struct packet_traits<float> : default_packet_traits { HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, HasBlend = 1, - HasInsert = 1, HasFloor = 1 #ifdef EIGEN_VECTORIZE_SSE4_1 @@ -133,8 +132,7 @@ struct packet_traits<double> : default_packet_traits { HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasBlend = 1, - HasInsert = 1 + HasBlend = 1 #ifdef EIGEN_VECTORIZE_SSE4_1 , @@ -171,10 +169,10 @@ template<> struct packet_traits<bool> : default_packet_traits size=16, HasAdd = 1, - HasSub = 0, + HasSub = 1, HasShift = 0, HasMul = 1, - HasNegate = 0, + HasNegate = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, @@ -254,6 +252,7 @@ template<> EIGEN_STRONG_INLINE Packet16b padd<Packet16b>(const Packet16b& a, con template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); } +template<> EIGEN_STRONG_INLINE Packet16b psub<Packet16b>(const Packet16b& a, const Packet16b& b) { return _mm_xor_si128(a,b); } template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { @@ -270,6 +269,11 @@ template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a); } +template<> EIGEN_STRONG_INLINE Packet16b pnegate(const Packet16b& a) +{ + return psub(pset1<Packet16b>(false), a); +} + template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; } template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; } template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; } @@ -305,11 +309,29 @@ template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& #endif #ifdef EIGEN_VECTORIZE_SSE4_1 -template<> EIGEN_DEVICE_FUNC inline Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) { return _mm_blendv_ps(b,a,mask); } +template<> EIGEN_DEVICE_FUNC inline Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) { + return _mm_blendv_ps(b,a,mask); +} + +template<> EIGEN_DEVICE_FUNC inline Packet4i pselect(const Packet4i& mask, const Packet4i& a, const Packet4i& b) { + return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(b),_mm_castsi128_ps(a),_mm_castsi128_ps(mask))); +} template<> EIGEN_DEVICE_FUNC inline Packet2d pselect(const Packet2d& mask, const Packet2d& a, const Packet2d& b) { return _mm_blendv_pd(b,a,mask); } + +template<> EIGEN_DEVICE_FUNC inline Packet16b pselect(const Packet16b& mask, const Packet16b& a, const Packet16b& b) { + return _mm_blendv_epi8(b,a,mask); +} +#else +template<> EIGEN_DEVICE_FUNC inline Packet16b pselect(const Packet16b& mask, const Packet16b& a, const Packet16b& b) { + Packet16b a_part = _mm_and_si128(mask, a); + Packet16b b_part = _mm_andnot_si128(mask, b); + return _mm_or_si128(a_part, b_part); +} #endif + + template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { #if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63 // There appears to be a bug in GCC, by which the optimizer may @@ -567,6 +589,23 @@ template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from) return vec4i_swizzle1(tmp, 0, 0, 1, 1); } +// Loads 8 bools from memory and returns the packet +// {b0, b0, b1, b1, b2, b2, b3, b3, b4, b4, b5, b5, b6, b6, b7, b7} +template<> EIGEN_STRONG_INLINE Packet16b ploaddup<Packet16b>(const bool* from) +{ + __m128i tmp = _mm_castpd_si128(pload1<Packet2d>(reinterpret_cast<const double*>(from))); + return _mm_unpacklo_epi8(tmp, tmp); +} + +// Loads 4 bools from memory and returns the packet +// {b0, b0 b0, b0, b1, b1, b1, b1, b2, b2, b2, b2, b3, b3, b3, b3} +template<> EIGEN_STRONG_INLINE Packet16b +ploadquad<Packet16b>(const bool* from) { + __m128i tmp = _mm_castps_si128(pload1<Packet4f>(reinterpret_cast<const float*>(from))); + tmp = _mm_unpacklo_epi8(tmp, tmp); + return _mm_unpacklo_epi16(tmp, tmp); +} + template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); } template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); } template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); } @@ -588,6 +627,14 @@ template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const dou template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride) { return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]); +} + +template<> EIGEN_DEVICE_FUNC inline Packet16b pgather<bool, Packet16b>(const bool* from, Index stride) +{ + return _mm_set_epi8(from[15*stride], from[14*stride], from[13*stride], from[12*stride], + from[11*stride], from[10*stride], from[9*stride], from[8*stride], + from[7*stride], from[6*stride], from[5*stride], from[4*stride], + from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride) @@ -609,6 +656,14 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2)); to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3)); } +template<> EIGEN_DEVICE_FUNC inline void pscatter<bool, Packet16b>(bool* to, const Packet16b& from, Index stride) +{ + to[4*stride*0] = _mm_cvtsi128_si32(from); + to[4*stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1)); + to[4*stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2)); + to[4*stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3)); +} + // some compilers might be tempted to perform multiple moves instead of using a vector path. template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a) @@ -653,12 +708,19 @@ template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { retu template<> EIGEN_STRONG_INLINE bool pfirst<Packet16b>(const Packet16b& a) { int x = _mm_cvtsi128_si32(a); return static_cast<bool>(x & 1); } #endif -template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) -{ return _mm_shuffle_ps(a,a,0x1B); } -template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) -{ return _mm_shuffle_pd(a,a,0x1); } -template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) -{ return _mm_shuffle_epi32(a,0x1B); } +template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return _mm_shuffle_ps(a,a,0x1B); } +template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return _mm_shuffle_pd(a,a,0x1); } +template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return _mm_shuffle_epi32(a,0x1B); } +template<> EIGEN_STRONG_INLINE Packet16b preverse(const Packet16b& a) { +#ifdef EIGEN_VECTORIZE_SSSE3 + __m128i mask = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + return _mm_shuffle_epi8(a, mask); +#else + Packet16b tmp = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3)); + tmp = _mm_shufflehi_epi16(_mm_shufflelo_epi16(tmp, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1)); + return _mm_or_si128(_mm_slli_epi16(tmp, 8), _mm_srli_epi16(tmp, 8)); +#endif +} template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { @@ -777,7 +839,7 @@ template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a) #endif template<> EIGEN_STRONG_INLINE bool predux<Packet16b>(const Packet16b& a) { -Packet4i tmp = _mm_or_si128(a, _mm_unpackhi_epi64(a,a)); + Packet4i tmp = _mm_or_si128(a, _mm_unpackhi_epi64(a,a)); return (pfirst(tmp) != 0) || (pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1)) != 0); } @@ -804,6 +866,12 @@ template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a) return (aux[0] * aux[1]) * (aux[2] * aux[3]); } +template<> EIGEN_STRONG_INLINE bool predux_mul<Packet16b>(const Packet16b& a) { + Packet4i tmp = _mm_and_si128(a, _mm_unpackhi_epi64(a,a)); + return ((pfirst<Packet4i>(tmp) == 0x01010101) && + (pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1)) == 0x01010101)); +} + // min template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a) { @@ -904,6 +972,87 @@ ptranspose(PacketBlock<Packet16b,4>& kernel) { kernel.packet[3] = _mm_unpackhi_epi16(T1, T3); } +EIGEN_DEVICE_FUNC inline void +ptranspose(PacketBlock<Packet16b,16>& kernel) { + // If we number the elements in the input thus: + // kernel.packet[ 0] = {00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 0a, 0b, 0c, 0d, 0e, 0f} + // kernel.packet[ 1] = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f} + // ... + // kernel.packet[15] = {f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff}, + // + // the desired output is: + // kernel.packet[ 0] = {00, 10, 20, 30, 40, 50, 60, 70, 80, 90, a0, b0, c0, d0, e0, f0} + // kernel.packet[ 1] = {01, 11, 21, 31, 41, 51, 61, 71, 81, 91, a1, b1, c1, d1, e1, f1} + // ... + // kernel.packet[15] = {0f, 1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f, 9f, af, bf, cf, df, ef, ff}, + __m128i t0 = _mm_unpacklo_epi8(kernel.packet[0], kernel.packet[1]); // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17 + __m128i t1 = _mm_unpackhi_epi8(kernel.packet[0], kernel.packet[1]); // 08 18 09 19 0a 1a 0b 1b 0c 1c 0d 1d 0e 1e 0f 1f + __m128i t2 = _mm_unpacklo_epi8(kernel.packet[2], kernel.packet[3]); // 20 30 21 31 22 32 ... 27 37 + __m128i t3 = _mm_unpackhi_epi8(kernel.packet[2], kernel.packet[3]); // 28 38 29 39 2a 3a ... 2f 3f + __m128i t4 = _mm_unpacklo_epi8(kernel.packet[4], kernel.packet[5]); // 40 50 41 51 42 52 47 57 + __m128i t5 = _mm_unpackhi_epi8(kernel.packet[4], kernel.packet[5]); // 48 58 49 59 4a 5a + __m128i t6 = _mm_unpacklo_epi8(kernel.packet[6], kernel.packet[7]); + __m128i t7 = _mm_unpackhi_epi8(kernel.packet[6], kernel.packet[7]); + __m128i t8 = _mm_unpacklo_epi8(kernel.packet[8], kernel.packet[9]); + __m128i t9 = _mm_unpackhi_epi8(kernel.packet[8], kernel.packet[9]); + __m128i ta = _mm_unpacklo_epi8(kernel.packet[10], kernel.packet[11]); + __m128i tb = _mm_unpackhi_epi8(kernel.packet[10], kernel.packet[11]); + __m128i tc = _mm_unpacklo_epi8(kernel.packet[12], kernel.packet[13]); + __m128i td = _mm_unpackhi_epi8(kernel.packet[12], kernel.packet[13]); + __m128i te = _mm_unpacklo_epi8(kernel.packet[14], kernel.packet[15]); + __m128i tf = _mm_unpackhi_epi8(kernel.packet[14], kernel.packet[15]); + + __m128i s0 = _mm_unpacklo_epi16(t0, t2); // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + __m128i s1 = _mm_unpackhi_epi16(t0, t2); // 04 14 24 34 + __m128i s2 = _mm_unpacklo_epi16(t1, t3); // 08 18 28 38 ... + __m128i s3 = _mm_unpackhi_epi16(t1, t3); // 0c 1c 2c 3c ... + __m128i s4 = _mm_unpacklo_epi16(t4, t6); // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73 + __m128i s5 = _mm_unpackhi_epi16(t4, t6); // 44 54 64 74 ... + __m128i s6 = _mm_unpacklo_epi16(t5, t7); + __m128i s7 = _mm_unpackhi_epi16(t5, t7); + __m128i s8 = _mm_unpacklo_epi16(t8, ta); + __m128i s9 = _mm_unpackhi_epi16(t8, ta); + __m128i sa = _mm_unpacklo_epi16(t9, tb); + __m128i sb = _mm_unpackhi_epi16(t9, tb); + __m128i sc = _mm_unpacklo_epi16(tc, te); + __m128i sd = _mm_unpackhi_epi16(tc, te); + __m128i se = _mm_unpacklo_epi16(td, tf); + __m128i sf = _mm_unpackhi_epi16(td, tf); + + __m128i u0 = _mm_unpacklo_epi32(s0, s4); // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71 + __m128i u1 = _mm_unpackhi_epi32(s0, s4); // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73 + __m128i u2 = _mm_unpacklo_epi32(s1, s5); + __m128i u3 = _mm_unpackhi_epi32(s1, s5); + __m128i u4 = _mm_unpacklo_epi32(s2, s6); + __m128i u5 = _mm_unpackhi_epi32(s2, s6); + __m128i u6 = _mm_unpacklo_epi32(s3, s7); + __m128i u7 = _mm_unpackhi_epi32(s3, s7); + __m128i u8 = _mm_unpacklo_epi32(s8, sc); + __m128i u9 = _mm_unpackhi_epi32(s8, sc); + __m128i ua = _mm_unpacklo_epi32(s9, sd); + __m128i ub = _mm_unpackhi_epi32(s9, sd); + __m128i uc = _mm_unpacklo_epi32(sa, se); + __m128i ud = _mm_unpackhi_epi32(sa, se); + __m128i ue = _mm_unpacklo_epi32(sb, sf); + __m128i uf = _mm_unpackhi_epi32(sb, sf); + + kernel.packet[0] = _mm_unpacklo_epi64(u0, u8); + kernel.packet[1] = _mm_unpackhi_epi64(u0, u8); + kernel.packet[2] = _mm_unpacklo_epi64(u1, u9); + kernel.packet[3] = _mm_unpackhi_epi64(u1, u9); + kernel.packet[4] = _mm_unpacklo_epi64(u2, ua); + kernel.packet[5] = _mm_unpackhi_epi64(u2, ua); + kernel.packet[6] = _mm_unpacklo_epi64(u3, ub); + kernel.packet[7] = _mm_unpackhi_epi64(u3, ub); + kernel.packet[8] = _mm_unpacklo_epi64(u4, uc); + kernel.packet[9] = _mm_unpackhi_epi64(u4, uc); + kernel.packet[10] = _mm_unpacklo_epi64(u5, ud); + kernel.packet[11] = _mm_unpackhi_epi64(u5, ud); + kernel.packet[12] = _mm_unpacklo_epi64(u6, ue); + kernel.packet[13] = _mm_unpackhi_epi64(u6, ue); + kernel.packet[14] = _mm_unpacklo_epi64(u7, uf); + kernel.packet[15] = _mm_unpackhi_epi64(u7, uf); +} template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) { const __m128i zero = _mm_setzero_si128(); diff --git a/test/packetmath.cpp b/test/packetmath.cpp index c7732e6e6..e59e9df21 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -10,11 +10,25 @@ #include "packetmath_test_shared.h" -#define REF_ADD(a,b) ((a)+(b)) -#define REF_SUB(a,b) ((a)-(b)) -#define REF_MUL(a,b) ((a)*(b)) -#define REF_DIV(a,b) ((a)/(b)) -#define REF_ABS_DIFF(a,b) ((a)>(b)?(a)-(b):(b)-(a)) +template <typename T> +inline T REF_ADD(const T& a, const T& b) { return a + b;} +template <typename T> +inline T REF_SUB(const T& a, const T& b) { return a - b;} +template <typename T> +inline T REF_MUL(const T& a, const T& b) { return a * b;} +template <typename T> +inline T REF_DIV(const T& a, const T& b) { return a / b;} +template <typename T> +inline T REF_ABS_DIFF(const T& a, const T& b) { return a>b ? a - b : b-a;} + +// Specializations for bool +template <> +inline bool REF_ADD(const bool& a, const bool& b) { return a || b;} +template <> +inline bool REF_SUB(const bool& a, const bool& b) { return a ^ b;} +template <> +inline bool REF_MUL(const bool& a, const bool& b) { return a && b;} + template<typename FromScalar, typename FromPacket, typename ToScalar, typename ToPacket, bool CanCast = false> struct test_cast_helper; @@ -70,7 +84,8 @@ void test_cast() { test_cast_helper<FromScalar, FromPacket, ToScalar, ToPacket, CanCast>::run(); } -template<typename Scalar,typename Packet> void packetmath_boolean() +template<typename Scalar,typename Packet> +void packetmath_boolean_mask_ops() { const int PacketSize = internal::unpacket_traits<Packet>::size; const int size = 2*PacketSize; @@ -82,9 +97,18 @@ template<typename Scalar,typename Packet> void packetmath_boolean() { data1[i] = internal::random<Scalar>(); } - CHECK_CWISE2_IF(true, internal::por, internal::por); - CHECK_CWISE2_IF(true, internal::pxor, internal::pxor); - CHECK_CWISE2_IF(true, internal::pand, internal::pand); + CHECK_CWISE1(internal::ptrue, internal::ptrue); + CHECK_CWISE2_IF(true, internal::pandnot, internal::pandnot); + for (int i = 0; i < PacketSize; ++i) { + data1[i] = Scalar(i); + data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0); + } + CHECK_CWISE2_IF(true, internal::pcmp_eq, internal::pcmp_eq); +} + +template<> +void packetmath_boolean_mask_ops<bool, internal::Packet16b>() +{ } template<typename Scalar,typename Packet> void packetmath() @@ -171,9 +195,6 @@ template<typename Scalar,typename Packet> void packetmath() CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul); CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv); - CHECK_CWISE1(internal::pnot, internal::pnot); - CHECK_CWISE1(internal::pzero, internal::pzero); - CHECK_CWISE1(internal::ptrue, internal::ptrue); if (PacketTraits::HasNegate) CHECK_CWISE1(internal::negate, internal::pnegate); CHECK_CWISE1(numext::conj, internal::pconj); @@ -252,7 +273,7 @@ template<typename Scalar,typename Packet> void packetmath() ref[0] = Scalar(1); for (int i=0; i<PacketSize; ++i) - ref[0] *= data1[i]; + ref[0] = REF_MUL(ref[0], data1[i]); VERIFY(internal::isApprox(ref[0], internal::predux_mul(internal::pload<Packet>(data1))) && "internal::predux_mul"); for (int i=0; i<PacketSize; ++i) @@ -272,6 +293,7 @@ template<typename Scalar,typename Packet> void packetmath() } } + if (PacketTraits::HasBlend) { Packet thenPacket = internal::pload<Packet>(data1); Packet elsePacket = internal::pload<Packet>(data2); @@ -304,26 +326,22 @@ template<typename Scalar,typename Packet> void packetmath() CHECK_CWISE3_IF(true, internal::pselect, internal::pselect); } - { - for (int i = 0; i < PacketSize; ++i) { - data1[i] = Scalar(i); - data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0); - } - CHECK_CWISE2_IF(true, internal::pcmp_eq, internal::pcmp_eq); - } - CHECK_CWISE1_IF(PacketTraits::HasSqrt, numext::sqrt, internal::psqrt); for (int i=0; i<size; ++i) { data1[i] = internal::random<Scalar>(); } - CHECK_CWISE2_IF(true, internal::pandnot, internal::pandnot); + CHECK_CWISE1(internal::pzero, internal::pzero); + CHECK_CWISE2_IF(true, internal::por, internal::por); + CHECK_CWISE2_IF(true, internal::pxor, internal::pxor); + CHECK_CWISE2_IF(true, internal::pand, internal::pand); - packetmath_boolean<Scalar, Packet>(); + packetmath_boolean_mask_ops<Scalar, Packet>(); } + template<typename Scalar,typename Packet> void packetmath_real() { typedef internal::packet_traits<Scalar> PacketTraits; @@ -753,7 +771,7 @@ EIGEN_DECLARE_TEST(packetmath) CALL_SUBTEST_12( test::runner<std::complex<double> >::run() ); CALL_SUBTEST_13(( packetmath<half,internal::packet_traits<half>::type>() )); #ifdef EIGEN_PACKET_MATH_SSE_H - CALL_SUBTEST_14(( packetmath_boolean<bool,internal::packet_traits<bool>::type>() )); + CALL_SUBTEST_14(( packetmath<bool,internal::packet_traits<bool>::type>() )); #endif g_first_pass = false; } |