aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2019-05-02 14:52:58 -0700
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2019-05-02 14:52:58 -0700
commite9f0eb8a5e6c5b17d85cff6ea45fe9aec35a0248 (patch)
tree6aa1ec005ad5b2314023bde6d21dcfce3a641b1e
parent96e30e936a32fdb44ec519403031a56e5fc501fd (diff)
Add masked_store_available to unpacket_traits
-rw-r--r--Eigen/src/Core/GenericPacketMath.h2
-rw-r--r--Eigen/src/Core/arch/AVX/Complex.h4
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h6
-rw-r--r--Eigen/src/Core/arch/AVX512/Complex.h6
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h6
-rw-r--r--Eigen/src/Core/arch/AltiVec/Complex.h4
-rwxr-xr-xEigen/src/Core/arch/AltiVec/PacketMath.h6
-rw-r--r--Eigen/src/Core/arch/GPU/PacketMath.h4
-rw-r--r--Eigen/src/Core/arch/GPU/PacketMathHalf.h13
-rw-r--r--Eigen/src/Core/arch/MSA/Complex.h4
-rw-r--r--Eigen/src/Core/arch/MSA/PacketMath.h6
-rw-r--r--Eigen/src/Core/arch/NEON/Complex.h4
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h6
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h4
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h6
-rw-r--r--Eigen/src/Core/arch/SYCL/InteropHeaders.h2
-rw-r--r--Eigen/src/Core/arch/ZVector/Complex.h4
-rwxr-xr-xEigen/src/Core/arch/ZVector/PacketMath.h6
-rw-r--r--Eigen/src/Core/util/XprHelper.h3
-rw-r--r--test/packetmath.cpp6
20 files changed, 53 insertions, 49 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index f1a8d5707..6ab38994f 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -377,7 +377,7 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
*/
template<typename Scalar, typename Packet>
EIGEN_DEVICE_FUNC inline
-typename enable_if<unpacket_traits<Packet>::masked_load_available, void>::type
+typename enable_if<unpacket_traits<Packet>::masked_store_available, void>::type
pstoreu(Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index 3d229cd81..f97efd471 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h
@@ -47,7 +47,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
#endif
-template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
@@ -263,7 +263,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
#endif
-template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 5011b98ea..7ee9dee10 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -118,14 +118,14 @@ template<> struct unpacket_traits<Packet8f> {
typedef Packet4f half;
typedef Packet8i integer_packet;
typedef uint8_t mask_t;
- enum {size=8, alignment=Aligned32, vectorizable=true, masked_load_available=true};
+ enum {size=8, alignment=Aligned32, vectorizable=true, masked_load_available=true, masked_store_available=true};
};
template<> struct unpacket_traits<Packet4d> {
typedef double type;
typedef Packet2d half;
- enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false};
+ enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false};
};
-template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32, vectorizable=false, masked_load_available=false}; };
+template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32, vectorizable=false, masked_load_available=false, masked_store_available=false}; };
template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h
index 5ab2ffe77..9ee7a284e 100644
--- a/Eigen/src/Core/arch/AVX512/Complex.h
+++ b/Eigen/src/Core/arch/AVX512/Complex.h
@@ -52,7 +52,8 @@ template<> struct unpacket_traits<Packet8cf> {
size = 8,
alignment=unpacket_traits<Packet16f>::alignment,
vectorizable=true,
- masked_load_available=false
+ masked_load_available=false,
+ masked_store_available=false
};
typedef Packet4cf half;
};
@@ -249,7 +250,8 @@ template<> struct unpacket_traits<Packet4cd> {
size = 4,
alignment = unpacket_traits<Packet8d>::alignment,
vectorizable=true,
- masked_load_available=false
+ masked_load_available=false,
+ masked_store_available=false
};
typedef Packet2cd half;
};
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index c822f9352..64619ecd9 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -103,19 +103,19 @@ struct unpacket_traits<Packet16f> {
typedef Packet8f half;
typedef Packet16i integer_packet;
typedef uint16_t mask_t;
- enum { size = 16, alignment=Aligned64, vectorizable=true, masked_load_available=true };
+ enum { size = 16, alignment=Aligned64, vectorizable=true, masked_load_available=true, masked_store_available=true };
};
template <>
struct unpacket_traits<Packet8d> {
typedef double type;
typedef Packet4d half;
- enum { size = 8, alignment=Aligned64, vectorizable=true, masked_load_available=false };
+ enum { size = 8, alignment=Aligned64, vectorizable=true, masked_load_available=false, masked_store_available=false };
};
template <>
struct unpacket_traits<Packet16i> {
typedef int type;
typedef Packet8i half;
- enum { size = 16, alignment=Aligned64, vectorizable=false, masked_load_available=false };
+ enum { size = 16, alignment=Aligned64, vectorizable=false, masked_load_available=false, masked_store_available=false };
};
template <>
diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h
index ebc3b2aeb..62df67ac9 100644
--- a/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -60,7 +60,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
@@ -291,7 +291,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); }
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index b5484e6aa..4b770d036 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -192,13 +192,13 @@ template<> struct unpacket_traits<Packet4f>
typedef float type;
typedef Packet4f half;
typedef Packet4i integer_packet;
- enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false};
+ enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};
};
template<> struct unpacket_traits<Packet4i>
{
typedef int type;
typedef Packet4i half;
- enum {size=4, alignment=Aligned16, vectorizable=false, masked_load_available=false};
+ enum {size=4, alignment=Aligned16, vectorizable=false, masked_load_available=false, masked_store_available=false};
};
inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v)
@@ -921,7 +921,7 @@ template<> struct packet_traits<double> : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2d half; };
inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
{
diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h
index 7fac0a5e1..ee5b4b39e 100644
--- a/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/Eigen/src/Core/arch/GPU/PacketMath.h
@@ -92,8 +92,8 @@ template<> struct packet_traits<double> : default_packet_traits
};
-template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef float4 half; };
-template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef double2 half; };
+template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef float4 half; };
+template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef double2 half; };
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) {
return make_float4(from, from, from, from);
diff --git a/Eigen/src/Core/arch/GPU/PacketMathHalf.h b/Eigen/src/Core/arch/GPU/PacketMathHalf.h
index 7fae5995a..de91db4c3 100644
--- a/Eigen/src/Core/arch/GPU/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/GPU/PacketMathHalf.h
@@ -42,7 +42,7 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits
};
};
-template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef half2 half; };
+template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef half2 half; };
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
return __half2half2(from);
@@ -567,7 +567,7 @@ struct packet_traits<half> : default_packet_traits {
};
-template<> struct unpacket_traits<Packet16h> { typedef Eigen::half type; typedef uint16_t mask_t; enum {size=16, alignment=Aligned32, vectorizable=true, masked_load_available=true}; typedef Packet16h half; };
+template<> struct unpacket_traits<Packet16h> { typedef Eigen::half type; typedef uint16_t mask_t; enum {size=16, alignment=Aligned32, vectorizable=true, masked_load_available=true, masked_store_available=false}; typedef Packet16h half; };
template<> EIGEN_STRONG_INLINE Packet16h pset1<Packet16h>(const Eigen::half& from) {
Packet16h result;
@@ -611,11 +611,6 @@ template<> EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet1
_mm256_storeu_si256((__m256i*)(void*)to, from.x);
}
-template<> EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet16h& from, int16_t umask) {
- __mmask16 mask = static_cast<__mmask16>(umask);
- _mm512_mask_storeu_epi16((__m256i*)(void*)to, mask, from.x);
-}
-
template<> EIGEN_STRONG_INLINE Packet16h
ploaddup<Packet16h>(const Eigen::half* from) {
Packet16h result;
@@ -1069,7 +1064,7 @@ struct packet_traits<Eigen::half> : default_packet_traits {
};
-template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet8h half; };
+template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet8h half; };
template<> EIGEN_STRONG_INLINE Packet8h pset1<Packet8h>(const Eigen::half& from) {
Packet8h result;
@@ -1432,7 +1427,7 @@ struct packet_traits<Eigen::half> : default_packet_traits {
};
-template<> struct unpacket_traits<Packet4h> { typedef Eigen::half type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet4h half; };
+template<> struct unpacket_traits<Packet4h> { typedef Eigen::half type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4h half; };
template<> EIGEN_STRONG_INLINE Packet4h pset1<Packet4h>(const Eigen::half& from) {
Packet4h result;
diff --git a/Eigen/src/Core/arch/MSA/Complex.h b/Eigen/src/Core/arch/MSA/Complex.h
index 0ced061c5..c09e1e4fb 100644
--- a/Eigen/src/Core/arch/MSA/Complex.h
+++ b/Eigen/src/Core/arch/MSA/Complex.h
@@ -127,7 +127,7 @@ struct packet_traits<std::complex<float> > : default_packet_traits {
template <>
struct unpacket_traits<Packet2cf> {
typedef std::complex<float> type;
- enum { size = 2, alignment = Aligned16, vectorizable=true, masked_load_available=false };
+ enum { size = 2, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };
typedef Packet2cf half;
};
@@ -500,7 +500,7 @@ struct packet_traits<std::complex<double> > : default_packet_traits {
template <>
struct unpacket_traits<Packet1cd> {
typedef std::complex<double> type;
- enum { size = 1, alignment = Aligned16, vectorizable=true, masked_load_available=false };
+ enum { size = 1, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };
typedef Packet1cd half;
};
diff --git a/Eigen/src/Core/arch/MSA/PacketMath.h b/Eigen/src/Core/arch/MSA/PacketMath.h
index f426d5b83..3c922d4de 100644
--- a/Eigen/src/Core/arch/MSA/PacketMath.h
+++ b/Eigen/src/Core/arch/MSA/PacketMath.h
@@ -117,14 +117,14 @@ struct packet_traits<int32_t> : default_packet_traits {
template <>
struct unpacket_traits<Packet4f> {
typedef float type;
- enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false };
+ enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };
typedef Packet4f half;
};
template <>
struct unpacket_traits<Packet4i> {
typedef int32_t type;
- enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false };
+ enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };
typedef Packet4i half;
};
@@ -925,7 +925,7 @@ struct packet_traits<double> : default_packet_traits {
template <>
struct unpacket_traits<Packet2d> {
typedef double type;
- enum { size = 2, alignment = Aligned16, vectorizable=true, masked_load_available=false };
+ enum { size = 2, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };
typedef Packet2d half;
};
diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h
index c17d0a03e..5a1ed386b 100644
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -62,7 +62,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
@@ -340,7 +340,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index b8051cfbf..84153ece1 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -145,13 +145,13 @@ template<> struct unpacket_traits<Packet4f>
typedef float type;
typedef Packet4f half;
typedef Packet4i integer_packet;
- enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false};
+ enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};
};
template<> struct unpacket_traits<Packet4i>
{
typedef int32_t type;
typedef Packet4i half;
- enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false};
+ enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};
};
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
@@ -657,7 +657,7 @@ template<> struct packet_traits<double> : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2d half; };
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return vdupq_n_f64(from); }
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index 7d89c323b..b3b1b4854 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -50,7 +50,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
#endif
-template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
@@ -283,7 +283,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
#endif
-template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index a4e8be92d..b466d6462 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -166,17 +166,17 @@ template<> struct unpacket_traits<Packet4f> {
typedef float type;
typedef Packet4f half;
typedef Packet4i integer_packet;
- enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false};
+ enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};
};
template<> struct unpacket_traits<Packet2d> {
typedef double type;
typedef Packet2d half;
- enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false};
+ enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};
};
template<> struct unpacket_traits<Packet4i> {
typedef int type;
typedef Packet4i half;
- enum {size=4, alignment=Aligned16, vectorizable=false, masked_load_available=false};
+ enum {size=4, alignment=Aligned16, vectorizable=false, masked_load_available=false, masked_store_available=false};
};
#ifndef EIGEN_VECTORIZE_AVX
diff --git a/Eigen/src/Core/arch/SYCL/InteropHeaders.h b/Eigen/src/Core/arch/SYCL/InteropHeaders.h
index 1afa63bb2..b09d45ea1 100644
--- a/Eigen/src/Core/arch/SYCL/InteropHeaders.h
+++ b/Eigen/src/Core/arch/SYCL/InteropHeaders.h
@@ -88,7 +88,7 @@ SYCL_ARITHMETIC(cl::sycl::cl_double2)
#define SYCL_UNPACKET_TRAITS(packet_type, unpacket_type, lengths)\
template<> struct unpacket_traits<packet_type> {\
typedef unpacket_type type;\
- enum {size=lengths, alignment=Aligned16, vectorizable=true, masked_load_available=false};\
+ enum {size=lengths, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};\
typedef packet_type half;\
};
SYCL_UNPACKET_TRAITS(cl::sycl::cl_float4, float, 4)
diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h
index 9fcbcb82d..8939619f5 100644
--- a/Eigen/src/Core/arch/ZVector/Complex.h
+++ b/Eigen/src/Core/arch/ZVector/Complex.h
@@ -91,8 +91,8 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; };
-template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; };
/* Forward declaration */
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel);
diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h
index 74e0a1313..b1c788168 100755
--- a/Eigen/src/Core/arch/ZVector/PacketMath.h
+++ b/Eigen/src/Core/arch/ZVector/PacketMath.h
@@ -239,9 +239,9 @@ template<> struct packet_traits<double> : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet4i half; };
-template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet4f half; };
-template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4i half; };
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2d half; };
/* Forward declaration */
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel);
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index ce019946b..bbad66d33 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -186,7 +186,8 @@ template<typename T> struct unpacket_traits
size = 1,
alignment = 1,
vectorizable = false,
- masked_load_available=false
+ masked_load_available=false,
+ masked_store_available=false
};
};
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index e704a53ea..d018aaeb0 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -228,6 +228,12 @@ template<typename Scalar,typename Packet> void packetmath()
VERIFY(areApprox(data3, data2, PacketSize) && "internal::ploadu masked");
}
}
+ }
+
+ if (internal::unpacket_traits<Packet>::masked_store_available)
+ {
+ packet_helper<internal::unpacket_traits<Packet>::masked_store_available, Packet> h;
+ unsigned long long max_umask = (0x1ull << PacketSize);
for (int offset=0; offset<PacketSize; ++offset)
{