aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/src/Core/GenericPacketMath.h67
-rw-r--r--Eigen/src/Core/arch/AVX/Complex.h5
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h5
-rw-r--r--Eigen/src/Core/arch/AVX512/Complex.h5
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h10
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h57
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h5
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h175
-rw-r--r--test/packetmath.cpp66
9 files changed, 243 insertions, 152 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 449793372..d25b45ab0 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -58,7 +58,6 @@ struct default_packet_traits
HasConj = 1,
HasSetLinear = 1,
HasBlend = 0,
- HasInsert = 0,
HasDiv = 0,
HasSqrt = 0,
@@ -191,8 +190,10 @@ psub(const Packet& a, const Packet& b) { return a-b; }
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pnegate(const Packet& a) { return -a; }
-/** \internal \returns conj(a) (coeff-wise) */
+template<> EIGEN_DEVICE_FUNC inline bool
+pnegate(const bool& a) { return !a; }
+/** \internal \returns conj(a) (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pconj(const Packet& a) { return numext::conj(a); }
@@ -269,38 +270,9 @@ pldexp(const Packet &a, const Packet &exponent) {
return ldexp(a, static_cast<int>(exponent));
}
-// Notice: The following ops accept and operator on bitwise masks.
-// The value of each field in a masks is Scalar(0) or ~Scalar(0).
-// For boolean packet like Packet16b, this is different from the
-// representation of true and false, which are 1 and 0.
-// As an example
-// ptrue<Packet16b>() = 0xffffffffffffffffffffffffffffffff
-// while
-// pset1<Packet16b>(true) = 0x01010101010101010101010101010101
-
-/** \internal \returns the bitwise and of \a a and \a b */
+/** \internal \returns zero bits */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pand(const Packet& a, const Packet& b) { return a & b; }
-
-/** \internal \returns the bitwise or of \a a and \a b */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-por(const Packet& a, const Packet& b) { return a | b; }
-
-/** \internal \returns the bitwise xor of \a a and \a b */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pxor(const Packet& a, const Packet& b) { return a ^ b; }
-
-/** \internal \returns the bitwise and of \a a and not \a b */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pandnot(const Packet& a, const Packet& b) { return a & (~b); }
-
-/** \internal \returns ones */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-ptrue(const Packet& /*a*/) { Packet b; memset((void*)&b, 0xff, sizeof(b)); return b;}
-
-/** \internal \returns zeros */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pzero(const Packet& a) { return pxor(a,a); }
+pzero(const Packet& /*a*/) { Packet b; memset((void*)&b, 0, sizeof(b)); return b;}
template<> EIGEN_DEVICE_FUNC inline float pzero<float>(const float& a) {
EIGEN_UNUSED_VARIABLE(a);
@@ -312,6 +284,10 @@ template<> EIGEN_DEVICE_FUNC inline double pzero<double>(const double& a) {
return 0.;
}
+/** \internal \returns one bits */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+ptrue(const Packet& /*a*/) { Packet b; memset((void*)&b, 0xff, sizeof(b)); return b;}
+
template <typename RealScalar>
EIGEN_DEVICE_FUNC inline std::complex<RealScalar> ptrue(const std::complex<RealScalar>& /*a*/) {
RealScalar b;
@@ -319,9 +295,21 @@ EIGEN_DEVICE_FUNC inline std::complex<RealScalar> ptrue(const std::complex<RealS
return std::complex<RealScalar>(b, b);
}
-/** \internal \returns the bitwise not of \a a */
-template <typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pnot(const Packet& a) { return pxor(ptrue(a), a);}
+/** \internal \returns the bitwise and of \a a and \a b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pand(const Packet& a, const Packet& b) { return a & b; }
+
+/** \internal \returns the bitwise or of \a a and \a b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+por(const Packet& a, const Packet& b) { return a | b; }
+
+/** \internal \returns the bitwise xor of \a a and \a b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pxor(const Packet& a, const Packet& b) { return a ^ b; }
+
+/** \internal \returns the bitwise and of \a a and not \a b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pandnot(const Packet& a, const Packet& b) { return pand(a, pxor(ptrue(b), b)); }
/** \internal \returns a <= b as a bit mask */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
@@ -337,7 +325,7 @@ pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); }
/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pcmp_lt_or_nan(const Packet& a, const Packet& b) { return pnot(pcmp_le(b,a)); }
+pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); }
/** \internal \returns \a or \b for each field in packet according to \mask */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
@@ -355,7 +343,10 @@ template<> EIGEN_DEVICE_FUNC inline double pselect<double>(
return numext::equal_strict(cond,0.) ? b : a;
}
-
+template<> EIGEN_DEVICE_FUNC inline bool pselect<bool>(
+ const bool& cond, const bool& a, const bool& b) {
+ return cond ? a : b;
+}
/** \internal \returns the min of \a a and \a b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index c2d5205f2..23568cae9 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h
@@ -42,8 +42,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
- HasSetLinear = 0,
- HasInsert = 1
+ HasSetLinear = 0
};
};
#endif
@@ -77,7 +76,6 @@ EIGEN_STRONG_INLINE Packet4cf pcmp_eq(const Packet4cf& a, const Packet4cf& b) {
}
template<> EIGEN_STRONG_INLINE Packet4cf ptrue<Packet4cf>(const Packet4cf& a) { return Packet4cf(ptrue(Packet8f(a.v))); }
-template<> EIGEN_STRONG_INLINE Packet4cf pnot<Packet4cf>(const Packet4cf& a) { return Packet4cf(pnot(Packet8f(a.v))); }
template<> EIGEN_STRONG_INLINE Packet4cf pand <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf por <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf pxor <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_xor_ps(a.v,b.v)); }
@@ -267,7 +265,6 @@ EIGEN_STRONG_INLINE Packet2cd pcmp_eq(const Packet2cd& a, const Packet2cd& b) {
}
template<> EIGEN_STRONG_INLINE Packet2cd ptrue<Packet2cd>(const Packet2cd& a) { return Packet2cd(ptrue(Packet4d(a.v))); }
-template<> EIGEN_STRONG_INLINE Packet2cd pnot<Packet2cd>(const Packet2cd& a) { return Packet2cd(pnot(Packet4d(a.v))); }
template<> EIGEN_STRONG_INLINE Packet2cd pand <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd por <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd pxor <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_xor_pd(a.v,b.v)); }
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 35a329e3f..b27a62cbc 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -62,7 +62,6 @@ template<> struct packet_traits<float> : default_packet_traits
AlignedOnScalar = 1,
size = 8,
HasHalfPacket = 1,
- HasInsert = 1,
HasDiv = 1,
HasSin = EIGEN_FAST_MATH,
@@ -93,7 +92,6 @@ template<> struct packet_traits<double> : default_packet_traits
AlignedOnScalar = 1,
size=4,
HasHalfPacket = 1,
- HasInsert = 1,
HasDiv = 1,
HasExp = 1,
@@ -131,8 +129,7 @@ struct packet_traits<Eigen::half> : default_packet_traits {
HasRsqrt = 0,
HasExp = 0,
HasLog = 0,
- HasBlend = 0,
- HasInsert = 1
+ HasBlend = 0
};
};
#endif
diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h
index dc2ae0a35..747f7a5d5 100644
--- a/Eigen/src/Core/arch/AVX512/Complex.h
+++ b/Eigen/src/Core/arch/AVX512/Complex.h
@@ -41,8 +41,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
- HasSetLinear = 0,
- HasInsert = 1
+ HasSetLinear = 0
};
};
@@ -59,7 +58,6 @@ template<> struct unpacket_traits<Packet8cf> {
};
template<> EIGEN_STRONG_INLINE Packet8cf ptrue<Packet8cf>(const Packet8cf& a) { return Packet8cf(ptrue(Packet16f(a.v))); }
-template<> EIGEN_STRONG_INLINE Packet8cf pnot<Packet8cf>(const Packet8cf& a) { return Packet8cf(pnot(Packet16f(a.v))); }
template<> EIGEN_STRONG_INLINE Packet8cf padd<Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(_mm512_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet8cf psub<Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(_mm512_sub_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet8cf pnegate(const Packet8cf& a)
@@ -266,7 +264,6 @@ template<> EIGEN_STRONG_INLINE Packet4cd pmul<Packet4cd>(const Packet4cd& a, con
}
template<> EIGEN_STRONG_INLINE Packet4cd ptrue<Packet4cd>(const Packet4cd& a) { return Packet4cd(ptrue(Packet8d(a.v))); }
-template<> EIGEN_STRONG_INLINE Packet4cd pnot<Packet4cd>(const Packet4cd& a) { return Packet4cd(pnot(Packet8d(a.v))); }
template<> EIGEN_STRONG_INLINE Packet4cd pand <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(pand(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cd por <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(por(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cd pxor <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(pxor(a.v,b.v)); }
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 10a1d4adb..ad37ad620 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -73,8 +73,7 @@ struct packet_traits<half> : default_packet_traits {
HasRsqrt = 0,
HasExp = 0,
HasLog = 0,
- HasBlend = 0,
- HasInsert = 1
+ HasBlend = 0
};
};
@@ -88,7 +87,6 @@ template<> struct packet_traits<float> : default_packet_traits
size = 16,
HasHalfPacket = 1,
HasBlend = 0,
- HasInsert = 1,
HasSin = EIGEN_FAST_MATH,
HasCos = EIGEN_FAST_MATH,
#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
@@ -117,7 +115,6 @@ template<> struct packet_traits<double> : default_packet_traits
AlignedOnScalar = 1,
size = 8,
HasHalfPacket = 1,
- HasInsert = 1,
#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
HasSqrt = EIGEN_FAST_MATH,
HasRsqrt = EIGEN_FAST_MATH,
@@ -1323,11 +1320,6 @@ template<> EIGEN_STRONG_INLINE Packet16h ptrue(const Packet16h& a) {
return ptrue(Packet8i(a));
}
-template<> EIGEN_STRONG_INLINE Packet16h pnot(const Packet16h& a) {
- return _mm256_xor_si256(a, ptrue(a));
-}
-
-
template<> EIGEN_STRONG_INLINE Packet16h por(const Packet16h& a,const Packet16h& b) {
// in some cases Packet8i is a wrapper around __m256i, so we need to
// cast to Packet8i to call the correct overload.
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index ee5a938b9..e11af1dca 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -136,7 +136,6 @@ struct packet_traits<float> : default_packet_traits
HasConj = 1,
HasSetLinear = 0,
HasBlend = 0,
- HasInsert = 1,
HasDiv = 1,
HasFloor = 1,
@@ -177,8 +176,7 @@ struct packet_traits<int8_t> : default_packet_traits
HasMax = 1,
HasConj = 1,
HasSetLinear = 0,
- HasBlend = 0,
- HasInsert = 1,
+ HasBlend = 0
};
};
@@ -209,7 +207,6 @@ struct packet_traits<uint8_t> : default_packet_traits
HasConj = 1,
HasSetLinear = 0,
HasBlend = 0,
- HasInsert = 1,
HasSqrt = 1
};
@@ -241,8 +238,7 @@ struct packet_traits<int16_t> : default_packet_traits
HasMax = 1,
HasConj = 1,
HasSetLinear = 0,
- HasBlend = 0,
- HasInsert = 1,
+ HasBlend = 0
};
};
@@ -273,8 +269,6 @@ struct packet_traits<uint16_t> : default_packet_traits
HasConj = 1,
HasSetLinear = 0,
HasBlend = 0,
- HasInsert = 1,
-
HasSqrt = 1
};
};
@@ -305,8 +299,7 @@ struct packet_traits<int32_t> : default_packet_traits
HasMax = 1,
HasConj = 1,
HasSetLinear = 0,
- HasBlend = 0,
- HasInsert = 1,
+ HasBlend = 0
};
};
@@ -337,7 +330,6 @@ struct packet_traits<uint32_t> : default_packet_traits
HasConj = 1,
HasSetLinear = 0,
HasBlend = 0,
- HasInsert = 1,
HasSqrt = 1
};
@@ -370,8 +362,7 @@ struct packet_traits<int64_t> : default_packet_traits
HasMax = 1,
HasConj = 1,
HasSetLinear = 0,
- HasBlend = 0,
- HasInsert = 1,
+ HasBlend = 0
};
};
@@ -402,8 +393,7 @@ struct packet_traits<uint64_t> : default_packet_traits
HasMax = 1,
HasConj = 1,
HasSetLinear = 0,
- HasBlend = 0,
- HasInsert = 1,
+ HasBlend = 0
};
};
@@ -1498,42 +1488,6 @@ template<> EIGEN_STRONG_INLINE Packet2l pandnot<Packet2l>(const Packet2l& a, con
template<> EIGEN_STRONG_INLINE Packet2ul pandnot<Packet2ul>(const Packet2ul& a, const Packet2ul& b)
{ return vbicq_u64(a,b); }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2f pnot<Packet2f>(const Packet2f& a)
-{ return vreinterpret_f32_u32(vmvn_u32(vreinterpret_u32_f32(a))); }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4f pnot<Packet4f>(const Packet4f& a)
-{ return vreinterpretq_f32_u32(vmvnq_u32(vreinterpretq_u32_f32(a))); }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4c pnot<Packet4c>(const Packet4c& a)
-{ return ~a; }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8c pnot<Packet8c>(const Packet8c& a)
-{ return vmvn_s8(a); }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet16c pnot<Packet16c>(const Packet16c& a)
-{ return vmvnq_s8(a); }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4uc pnot<Packet4uc>(const Packet4uc& a)
-{ return ~a; }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8uc pnot<Packet8uc>(const Packet8uc& a)
-{ return vmvn_u8(a); }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet16uc pnot<Packet16uc>(const Packet16uc& a)
-{ return vmvnq_u8(a); }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4s pnot<Packet4s>(const Packet4s& a)
-{ return vmvn_s16(a); }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8s pnot<Packet8s>(const Packet8s& a)
-{ return vmvnq_s16(a); }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4us pnot<Packet4us>(const Packet4us& a)
-{ return vmvn_u16(a); }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8us pnot<Packet8us>(const Packet8us& a)
-{ return vmvnq_u16(a); }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2i pnot<Packet2i>(const Packet2i& a)
-{ return vmvn_s32(a); }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4i pnot<Packet4i>(const Packet4i& a)
-{ return vmvnq_s32(a); }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2ui pnot<Packet2ui>(const Packet2ui& a)
-{ return vmvn_u32(a); }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4ui pnot<Packet4ui>(const Packet4ui& a)
-{ return vmvnq_u32(a); }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2l pnot<Packet2l>(const Packet2l& a)
-{ return vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a))); }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2ul pnot<Packet2ul>(const Packet2ul& a)
-{ return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(a))); }
template<int N> EIGEN_STRONG_INLINE Packet4c parithmetic_shift_right(Packet4c& a)
{ return vget_lane_s32(vreinterpret_s32_s8(vshr_n_s8(vreinterpret_s8_s32(vdup_n_s32(a)), N)), 0); }
@@ -3218,7 +3172,6 @@ template<> struct packet_traits<double> : default_packet_traits
HasConj = 1,
HasSetLinear = 0,
HasBlend = 0,
- HasInsert = 1,
HasDiv = 1,
HasFloor = 1,
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index 8bf8bfe85..0d322a2a1 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -45,8 +45,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
HasMin = 0,
HasMax = 0,
HasSetLinear = 0,
- HasBlend = 1,
- HasInsert = 1
+ HasBlend = 1
};
};
#endif
@@ -84,7 +83,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con
}
template<> EIGEN_STRONG_INLINE Packet2cf ptrue <Packet2cf>(const Packet2cf& a) { return Packet2cf(ptrue(Packet4f(a.v))); }
-template<> EIGEN_STRONG_INLINE Packet2cf pnot <Packet2cf>(const Packet2cf& a) { return Packet2cf(pnot(Packet4f(a.v))); }
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
@@ -292,7 +290,6 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
}
template<> EIGEN_STRONG_INLINE Packet1cd ptrue <Packet1cd>(const Packet1cd& a) { return Packet1cd(ptrue(Packet2d(a.v))); }
-template<> EIGEN_STRONG_INLINE Packet1cd pnot <Packet1cd>(const Packet1cd& a) { return Packet1cd(pnot(Packet2d(a.v))); }
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 645aee0cd..9f2e922a2 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -108,7 +108,6 @@ struct packet_traits<float> : default_packet_traits {
HasTanh = EIGEN_FAST_MATH,
HasErf = EIGEN_FAST_MATH,
HasBlend = 1,
- HasInsert = 1,
HasFloor = 1
#ifdef EIGEN_VECTORIZE_SSE4_1
@@ -133,8 +132,7 @@ struct packet_traits<double> : default_packet_traits {
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
- HasBlend = 1,
- HasInsert = 1
+ HasBlend = 1
#ifdef EIGEN_VECTORIZE_SSE4_1
,
@@ -171,10 +169,10 @@ template<> struct packet_traits<bool> : default_packet_traits
size=16,
HasAdd = 1,
- HasSub = 0,
+ HasSub = 1,
HasShift = 0,
HasMul = 1,
- HasNegate = 0,
+ HasNegate = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
@@ -254,6 +252,7 @@ template<> EIGEN_STRONG_INLINE Packet16b padd<Packet16b>(const Packet16b& a, con
template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet16b psub<Packet16b>(const Packet16b& a, const Packet16b& b) { return _mm_xor_si128(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
{
@@ -270,6 +269,11 @@ template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a);
}
+template<> EIGEN_STRONG_INLINE Packet16b pnegate(const Packet16b& a)
+{
+ return psub(pset1<Packet16b>(false), a);
+}
+
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
@@ -305,11 +309,29 @@ template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d&
#endif
#ifdef EIGEN_VECTORIZE_SSE4_1
-template<> EIGEN_DEVICE_FUNC inline Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) { return _mm_blendv_ps(b,a,mask); }
+template<> EIGEN_DEVICE_FUNC inline Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) {
+ return _mm_blendv_ps(b,a,mask);
+}
+
+template<> EIGEN_DEVICE_FUNC inline Packet4i pselect(const Packet4i& mask, const Packet4i& a, const Packet4i& b) {
+ return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(b),_mm_castsi128_ps(a),_mm_castsi128_ps(mask)));
+}
template<> EIGEN_DEVICE_FUNC inline Packet2d pselect(const Packet2d& mask, const Packet2d& a, const Packet2d& b) { return _mm_blendv_pd(b,a,mask); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet16b pselect(const Packet16b& mask, const Packet16b& a, const Packet16b& b) {
+ return _mm_blendv_epi8(b,a,mask);
+}
+#else
+template<> EIGEN_DEVICE_FUNC inline Packet16b pselect(const Packet16b& mask, const Packet16b& a, const Packet16b& b) {
+ Packet16b a_part = _mm_and_si128(mask, a);
+ Packet16b b_part = _mm_andnot_si128(mask, b);
+ return _mm_or_si128(a_part, b_part);
+}
#endif
+
+
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) {
#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
// There appears to be a bug in GCC, by which the optimizer may
@@ -567,6 +589,23 @@ template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
return vec4i_swizzle1(tmp, 0, 0, 1, 1);
}
+// Loads 8 bools from memory and returns the packet
+// {b0, b0, b1, b1, b2, b2, b3, b3, b4, b4, b5, b5, b6, b6, b7, b7}
+template<> EIGEN_STRONG_INLINE Packet16b ploaddup<Packet16b>(const bool* from)
+{
+ __m128i tmp = _mm_castpd_si128(pload1<Packet2d>(reinterpret_cast<const double*>(from)));
+ return _mm_unpacklo_epi8(tmp, tmp);
+}
+
+// Loads 4 bools from memory and returns the packet
+// {b0, b0 b0, b0, b1, b1, b1, b1, b2, b2, b2, b2, b3, b3, b3, b3}
+template<> EIGEN_STRONG_INLINE Packet16b
+ploadquad<Packet16b>(const bool* from) {
+ __m128i tmp = _mm_castps_si128(pload1<Packet4f>(reinterpret_cast<const float*>(from)));
+ tmp = _mm_unpacklo_epi8(tmp, tmp);
+ return _mm_unpacklo_epi16(tmp, tmp);
+}
+
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
@@ -588,6 +627,14 @@ template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const dou
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
{
return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+
+template<> EIGEN_DEVICE_FUNC inline Packet16b pgather<bool, Packet16b>(const bool* from, Index stride)
+{
+ return _mm_set_epi8(from[15*stride], from[14*stride], from[13*stride], from[12*stride],
+ from[11*stride], from[10*stride], from[9*stride], from[8*stride],
+ from[7*stride], from[6*stride], from[5*stride], from[4*stride],
+ from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
@@ -609,6 +656,14 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const
to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<bool, Packet16b>(bool* to, const Packet16b& from, Index stride)
+{
+ to[4*stride*0] = _mm_cvtsi128_si32(from);
+ to[4*stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
+ to[4*stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
+ to[4*stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
+}
+
// some compilers might be tempted to perform multiple moves instead of using a vector path.
template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
@@ -653,12 +708,19 @@ template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { retu
template<> EIGEN_STRONG_INLINE bool pfirst<Packet16b>(const Packet16b& a) { int x = _mm_cvtsi128_si32(a); return static_cast<bool>(x & 1); }
#endif
-template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
-{ return _mm_shuffle_ps(a,a,0x1B); }
-template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
-{ return _mm_shuffle_pd(a,a,0x1); }
-template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
-{ return _mm_shuffle_epi32(a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return _mm_shuffle_ps(a,a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return _mm_shuffle_pd(a,a,0x1); }
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return _mm_shuffle_epi32(a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet16b preverse(const Packet16b& a) {
+#ifdef EIGEN_VECTORIZE_SSSE3
+ __m128i mask = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ return _mm_shuffle_epi8(a, mask);
+#else
+ Packet16b tmp = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3));
+ tmp = _mm_shufflehi_epi16(_mm_shufflelo_epi16(tmp, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1));
+ return _mm_or_si128(_mm_slli_epi16(tmp, 8), _mm_srli_epi16(tmp, 8));
+#endif
+}
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
{
@@ -777,7 +839,7 @@ template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
#endif
template<> EIGEN_STRONG_INLINE bool predux<Packet16b>(const Packet16b& a) {
-Packet4i tmp = _mm_or_si128(a, _mm_unpackhi_epi64(a,a));
+ Packet4i tmp = _mm_or_si128(a, _mm_unpackhi_epi64(a,a));
return (pfirst(tmp) != 0) || (pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1)) != 0);
}
@@ -804,6 +866,12 @@ template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
return (aux[0] * aux[1]) * (aux[2] * aux[3]);
}
+template<> EIGEN_STRONG_INLINE bool predux_mul<Packet16b>(const Packet16b& a) {
+ Packet4i tmp = _mm_and_si128(a, _mm_unpackhi_epi64(a,a));
+ return ((pfirst<Packet4i>(tmp) == 0x01010101) &&
+ (pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1)) == 0x01010101));
+}
+
// min
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
{
@@ -904,6 +972,87 @@ ptranspose(PacketBlock<Packet16b,4>& kernel) {
kernel.packet[3] = _mm_unpackhi_epi16(T1, T3);
}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet16b,16>& kernel) {
+ // If we number the elements in the input thus:
+ // kernel.packet[ 0] = {00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 0a, 0b, 0c, 0d, 0e, 0f}
+ // kernel.packet[ 1] = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f}
+ // ...
+ // kernel.packet[15] = {f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff},
+ //
+ // the desired output is:
+ // kernel.packet[ 0] = {00, 10, 20, 30, 40, 50, 60, 70, 80, 90, a0, b0, c0, d0, e0, f0}
+ // kernel.packet[ 1] = {01, 11, 21, 31, 41, 51, 61, 71, 81, 91, a1, b1, c1, d1, e1, f1}
+ // ...
+ // kernel.packet[15] = {0f, 1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f, 9f, af, bf, cf, df, ef, ff},
+ __m128i t0 = _mm_unpacklo_epi8(kernel.packet[0], kernel.packet[1]); // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17
+ __m128i t1 = _mm_unpackhi_epi8(kernel.packet[0], kernel.packet[1]); // 08 18 09 19 0a 1a 0b 1b 0c 1c 0d 1d 0e 1e 0f 1f
+ __m128i t2 = _mm_unpacklo_epi8(kernel.packet[2], kernel.packet[3]); // 20 30 21 31 22 32 ... 27 37
+ __m128i t3 = _mm_unpackhi_epi8(kernel.packet[2], kernel.packet[3]); // 28 38 29 39 2a 3a ... 2f 3f
+ __m128i t4 = _mm_unpacklo_epi8(kernel.packet[4], kernel.packet[5]); // 40 50 41 51 42 52 47 57
+ __m128i t5 = _mm_unpackhi_epi8(kernel.packet[4], kernel.packet[5]); // 48 58 49 59 4a 5a
+ __m128i t6 = _mm_unpacklo_epi8(kernel.packet[6], kernel.packet[7]);
+ __m128i t7 = _mm_unpackhi_epi8(kernel.packet[6], kernel.packet[7]);
+ __m128i t8 = _mm_unpacklo_epi8(kernel.packet[8], kernel.packet[9]);
+ __m128i t9 = _mm_unpackhi_epi8(kernel.packet[8], kernel.packet[9]);
+ __m128i ta = _mm_unpacklo_epi8(kernel.packet[10], kernel.packet[11]);
+ __m128i tb = _mm_unpackhi_epi8(kernel.packet[10], kernel.packet[11]);
+ __m128i tc = _mm_unpacklo_epi8(kernel.packet[12], kernel.packet[13]);
+ __m128i td = _mm_unpackhi_epi8(kernel.packet[12], kernel.packet[13]);
+ __m128i te = _mm_unpacklo_epi8(kernel.packet[14], kernel.packet[15]);
+ __m128i tf = _mm_unpackhi_epi8(kernel.packet[14], kernel.packet[15]);
+
+ __m128i s0 = _mm_unpacklo_epi16(t0, t2); // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
+ __m128i s1 = _mm_unpackhi_epi16(t0, t2); // 04 14 24 34
+ __m128i s2 = _mm_unpacklo_epi16(t1, t3); // 08 18 28 38 ...
+ __m128i s3 = _mm_unpackhi_epi16(t1, t3); // 0c 1c 2c 3c ...
+ __m128i s4 = _mm_unpacklo_epi16(t4, t6); // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73
+ __m128i s5 = _mm_unpackhi_epi16(t4, t6); // 44 54 64 74 ...
+ __m128i s6 = _mm_unpacklo_epi16(t5, t7);
+ __m128i s7 = _mm_unpackhi_epi16(t5, t7);
+ __m128i s8 = _mm_unpacklo_epi16(t8, ta);
+ __m128i s9 = _mm_unpackhi_epi16(t8, ta);
+ __m128i sa = _mm_unpacklo_epi16(t9, tb);
+ __m128i sb = _mm_unpackhi_epi16(t9, tb);
+ __m128i sc = _mm_unpacklo_epi16(tc, te);
+ __m128i sd = _mm_unpackhi_epi16(tc, te);
+ __m128i se = _mm_unpacklo_epi16(td, tf);
+ __m128i sf = _mm_unpackhi_epi16(td, tf);
+
+ __m128i u0 = _mm_unpacklo_epi32(s0, s4); // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71
+ __m128i u1 = _mm_unpackhi_epi32(s0, s4); // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73
+ __m128i u2 = _mm_unpacklo_epi32(s1, s5);
+ __m128i u3 = _mm_unpackhi_epi32(s1, s5);
+ __m128i u4 = _mm_unpacklo_epi32(s2, s6);
+ __m128i u5 = _mm_unpackhi_epi32(s2, s6);
+ __m128i u6 = _mm_unpacklo_epi32(s3, s7);
+ __m128i u7 = _mm_unpackhi_epi32(s3, s7);
+ __m128i u8 = _mm_unpacklo_epi32(s8, sc);
+ __m128i u9 = _mm_unpackhi_epi32(s8, sc);
+ __m128i ua = _mm_unpacklo_epi32(s9, sd);
+ __m128i ub = _mm_unpackhi_epi32(s9, sd);
+ __m128i uc = _mm_unpacklo_epi32(sa, se);
+ __m128i ud = _mm_unpackhi_epi32(sa, se);
+ __m128i ue = _mm_unpacklo_epi32(sb, sf);
+ __m128i uf = _mm_unpackhi_epi32(sb, sf);
+
+ kernel.packet[0] = _mm_unpacklo_epi64(u0, u8);
+ kernel.packet[1] = _mm_unpackhi_epi64(u0, u8);
+ kernel.packet[2] = _mm_unpacklo_epi64(u1, u9);
+ kernel.packet[3] = _mm_unpackhi_epi64(u1, u9);
+ kernel.packet[4] = _mm_unpacklo_epi64(u2, ua);
+ kernel.packet[5] = _mm_unpackhi_epi64(u2, ua);
+ kernel.packet[6] = _mm_unpacklo_epi64(u3, ub);
+ kernel.packet[7] = _mm_unpackhi_epi64(u3, ub);
+ kernel.packet[8] = _mm_unpacklo_epi64(u4, uc);
+ kernel.packet[9] = _mm_unpackhi_epi64(u4, uc);
+ kernel.packet[10] = _mm_unpacklo_epi64(u5, ud);
+ kernel.packet[11] = _mm_unpackhi_epi64(u5, ud);
+ kernel.packet[12] = _mm_unpacklo_epi64(u6, ue);
+ kernel.packet[13] = _mm_unpackhi_epi64(u6, ue);
+ kernel.packet[14] = _mm_unpacklo_epi64(u7, uf);
+ kernel.packet[15] = _mm_unpackhi_epi64(u7, uf);
+}
template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
const __m128i zero = _mm_setzero_si128();
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index c7732e6e6..e59e9df21 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -10,11 +10,25 @@
#include "packetmath_test_shared.h"
-#define REF_ADD(a,b) ((a)+(b))
-#define REF_SUB(a,b) ((a)-(b))
-#define REF_MUL(a,b) ((a)*(b))
-#define REF_DIV(a,b) ((a)/(b))
-#define REF_ABS_DIFF(a,b) ((a)>(b)?(a)-(b):(b)-(a))
+template <typename T>
+inline T REF_ADD(const T& a, const T& b) { return a + b;}
+template <typename T>
+inline T REF_SUB(const T& a, const T& b) { return a - b;}
+template <typename T>
+inline T REF_MUL(const T& a, const T& b) { return a * b;}
+template <typename T>
+inline T REF_DIV(const T& a, const T& b) { return a / b;}
+template <typename T>
+inline T REF_ABS_DIFF(const T& a, const T& b) { return a>b ? a - b : b-a;}
+
+// Specializations for bool
+template <>
+inline bool REF_ADD(const bool& a, const bool& b) { return a || b;}
+template <>
+inline bool REF_SUB(const bool& a, const bool& b) { return a ^ b;}
+template <>
+inline bool REF_MUL(const bool& a, const bool& b) { return a && b;}
+
template<typename FromScalar, typename FromPacket, typename ToScalar, typename ToPacket, bool CanCast = false>
struct test_cast_helper;
@@ -70,7 +84,8 @@ void test_cast() {
test_cast_helper<FromScalar, FromPacket, ToScalar, ToPacket, CanCast>::run();
}
-template<typename Scalar,typename Packet> void packetmath_boolean()
+template<typename Scalar,typename Packet>
+void packetmath_boolean_mask_ops()
{
const int PacketSize = internal::unpacket_traits<Packet>::size;
const int size = 2*PacketSize;
@@ -82,9 +97,18 @@ template<typename Scalar,typename Packet> void packetmath_boolean()
{
data1[i] = internal::random<Scalar>();
}
- CHECK_CWISE2_IF(true, internal::por, internal::por);
- CHECK_CWISE2_IF(true, internal::pxor, internal::pxor);
- CHECK_CWISE2_IF(true, internal::pand, internal::pand);
+ CHECK_CWISE1(internal::ptrue, internal::ptrue);
+ CHECK_CWISE2_IF(true, internal::pandnot, internal::pandnot);
+ for (int i = 0; i < PacketSize; ++i) {
+ data1[i] = Scalar(i);
+ data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
+ }
+ CHECK_CWISE2_IF(true, internal::pcmp_eq, internal::pcmp_eq);
+}
+
+template<>
+void packetmath_boolean_mask_ops<bool, internal::Packet16b>()
+{
}
template<typename Scalar,typename Packet> void packetmath()
@@ -171,9 +195,6 @@ template<typename Scalar,typename Packet> void packetmath()
CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul);
CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv);
- CHECK_CWISE1(internal::pnot, internal::pnot);
- CHECK_CWISE1(internal::pzero, internal::pzero);
- CHECK_CWISE1(internal::ptrue, internal::ptrue);
if (PacketTraits::HasNegate)
CHECK_CWISE1(internal::negate, internal::pnegate);
CHECK_CWISE1(numext::conj, internal::pconj);
@@ -252,7 +273,7 @@ template<typename Scalar,typename Packet> void packetmath()
ref[0] = Scalar(1);
for (int i=0; i<PacketSize; ++i)
- ref[0] *= data1[i];
+ ref[0] = REF_MUL(ref[0], data1[i]);
VERIFY(internal::isApprox(ref[0], internal::predux_mul(internal::pload<Packet>(data1))) && "internal::predux_mul");
for (int i=0; i<PacketSize; ++i)
@@ -272,6 +293,7 @@ template<typename Scalar,typename Packet> void packetmath()
}
}
+
if (PacketTraits::HasBlend) {
Packet thenPacket = internal::pload<Packet>(data1);
Packet elsePacket = internal::pload<Packet>(data2);
@@ -304,26 +326,22 @@ template<typename Scalar,typename Packet> void packetmath()
CHECK_CWISE3_IF(true, internal::pselect, internal::pselect);
}
- {
- for (int i = 0; i < PacketSize; ++i) {
- data1[i] = Scalar(i);
- data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
- }
- CHECK_CWISE2_IF(true, internal::pcmp_eq, internal::pcmp_eq);
- }
-
CHECK_CWISE1_IF(PacketTraits::HasSqrt, numext::sqrt, internal::psqrt);
for (int i=0; i<size; ++i)
{
data1[i] = internal::random<Scalar>();
}
- CHECK_CWISE2_IF(true, internal::pandnot, internal::pandnot);
+ CHECK_CWISE1(internal::pzero, internal::pzero);
+ CHECK_CWISE2_IF(true, internal::por, internal::por);
+ CHECK_CWISE2_IF(true, internal::pxor, internal::pxor);
+ CHECK_CWISE2_IF(true, internal::pand, internal::pand);
- packetmath_boolean<Scalar, Packet>();
+ packetmath_boolean_mask_ops<Scalar, Packet>();
}
+
template<typename Scalar,typename Packet> void packetmath_real()
{
typedef internal::packet_traits<Scalar> PacketTraits;
@@ -753,7 +771,7 @@ EIGEN_DECLARE_TEST(packetmath)
CALL_SUBTEST_12( test::runner<std::complex<double> >::run() );
CALL_SUBTEST_13(( packetmath<half,internal::packet_traits<half>::type>() ));
#ifdef EIGEN_PACKET_MATH_SSE_H
- CALL_SUBTEST_14(( packetmath_boolean<bool,internal::packet_traits<bool>::type>() ));
+ CALL_SUBTEST_14(( packetmath<bool,internal::packet_traits<bool>::type>() ));
#endif
g_first_pass = false;
}