diff options
author | Eugene Zhulenev <ezhulenev@google.com> | 2019-05-02 14:52:58 -0700 |
---|---|---|
committer | Eugene Zhulenev <ezhulenev@google.com> | 2019-05-02 14:52:58 -0700 |
commit | e9f0eb8a5e6c5b17d85cff6ea45fe9aec35a0248 (patch) | |
tree | 6aa1ec005ad5b2314023bde6d21dcfce3a641b1e | |
parent | 96e30e936a32fdb44ec519403031a56e5fc501fd (diff) |
Add masked_store_available to unpacket_traits
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/Complex.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/PacketMath.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX512/Complex.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX512/PacketMath.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/Complex.h | 4 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/AltiVec/PacketMath.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/GPU/PacketMath.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/GPU/PacketMathHalf.h | 13 | ||||
-rw-r--r-- | Eigen/src/Core/arch/MSA/Complex.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/MSA/PacketMath.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/Complex.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/Complex.h | 4 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/SSE/PacketMath.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SYCL/InteropHeaders.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/arch/ZVector/Complex.h | 4 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/ZVector/PacketMath.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/util/XprHelper.h | 3 | ||||
-rw-r--r-- | test/packetmath.cpp | 6 |
20 files changed, 53 insertions, 49 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index f1a8d5707..6ab38994f 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -377,7 +377,7 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu */ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline -typename enable_if<unpacket_traits<Packet>::masked_load_available, void>::type +typename enable_if<unpacket_traits<Packet>::masked_store_available, void>::type pstoreu(Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask); template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index 3d229cd81..f97efd471 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -47,7 +47,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits }; #endif -template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; }; +template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; }; template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); } @@ -263,7 +263,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits }; #endif -template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; }; +template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; }; template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); } diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 5011b98ea..7ee9dee10 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -118,14 +118,14 @@ template<> struct unpacket_traits<Packet8f> { typedef Packet4f half; typedef Packet8i integer_packet; typedef uint8_t mask_t; - enum {size=8, alignment=Aligned32, vectorizable=true, masked_load_available=true}; + enum {size=8, alignment=Aligned32, vectorizable=true, masked_load_available=true, masked_store_available=true}; }; template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; - enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false}; + enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false, masked_store_available=false}; }; -template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32, vectorizable=false, masked_load_available=false}; }; +template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32, vectorizable=false, masked_load_available=false, masked_store_available=false}; }; template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); } template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); } diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h index 5ab2ffe77..9ee7a284e 100644 --- a/Eigen/src/Core/arch/AVX512/Complex.h +++ b/Eigen/src/Core/arch/AVX512/Complex.h @@ -52,7 +52,8 @@ template<> struct unpacket_traits<Packet8cf> { size = 8, alignment=unpacket_traits<Packet16f>::alignment, vectorizable=true, - masked_load_available=false + masked_load_available=false, + masked_store_available=false }; typedef Packet4cf half; }; @@ -249,7 +250,8 @@ template<> struct unpacket_traits<Packet4cd> { size = 4, alignment = unpacket_traits<Packet8d>::alignment, vectorizable=true, - masked_load_available=false + masked_load_available=false, + masked_store_available=false }; typedef Packet2cd half; }; diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index c822f9352..64619ecd9 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -103,19 +103,19 @@ struct unpacket_traits<Packet16f> { typedef Packet8f half; typedef Packet16i integer_packet; typedef uint16_t mask_t; - enum { size = 16, alignment=Aligned64, vectorizable=true, masked_load_available=true }; + enum { size = 16, alignment=Aligned64, vectorizable=true, masked_load_available=true, masked_store_available=true }; }; template <> struct unpacket_traits<Packet8d> { typedef double type; typedef Packet4d half; - enum { size = 8, alignment=Aligned64, vectorizable=true, masked_load_available=false }; + enum { size = 8, alignment=Aligned64, vectorizable=true, masked_load_available=false, masked_store_available=false }; }; template <> struct unpacket_traits<Packet16i> { typedef int type; typedef Packet8i half; - enum { size = 16, alignment=Aligned64, vectorizable=false, masked_load_available=false }; + enum { size = 16, alignment=Aligned64, vectorizable=false, masked_load_available=false, masked_store_available=false }; }; template <> diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index ebc3b2aeb..62df67ac9 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -60,7 +60,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits }; }; -template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; }; +template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; }; template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) { @@ -291,7 +291,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits }; }; -template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; }; +template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; }; template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); } template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); } diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index b5484e6aa..4b770d036 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -192,13 +192,13 @@ template<> struct unpacket_traits<Packet4f> typedef float type; typedef Packet4f half; typedef Packet4i integer_packet; - enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; + enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; }; template<> struct unpacket_traits<Packet4i> { typedef int type; typedef Packet4i half; - enum {size=4, alignment=Aligned16, vectorizable=false, masked_load_available=false}; + enum {size=4, alignment=Aligned16, vectorizable=false, masked_load_available=false, masked_store_available=false}; }; inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v) @@ -921,7 +921,7 @@ template<> struct packet_traits<double> : default_packet_traits }; }; -template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2d half; }; +template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2d half; }; inline std::ostream & operator <<(std::ostream & s, const Packet2l & v) { diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h index 7fac0a5e1..ee5b4b39e 100644 --- a/Eigen/src/Core/arch/GPU/PacketMath.h +++ b/Eigen/src/Core/arch/GPU/PacketMath.h @@ -92,8 +92,8 @@ template<> struct packet_traits<double> : default_packet_traits }; -template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef float4 half; }; -template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef double2 half; }; +template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef float4 half; }; +template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef double2 half; }; template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) { return make_float4(from, from, from, from); diff --git a/Eigen/src/Core/arch/GPU/PacketMathHalf.h b/Eigen/src/Core/arch/GPU/PacketMathHalf.h index 7fae5995a..de91db4c3 100644 --- a/Eigen/src/Core/arch/GPU/PacketMathHalf.h +++ b/Eigen/src/Core/arch/GPU/PacketMathHalf.h @@ -42,7 +42,7 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits }; }; -template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef half2 half; }; +template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef half2 half; }; template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) { return __half2half2(from); @@ -567,7 +567,7 @@ struct packet_traits<half> : default_packet_traits { }; -template<> struct unpacket_traits<Packet16h> { typedef Eigen::half type; typedef uint16_t mask_t; enum {size=16, alignment=Aligned32, vectorizable=true, masked_load_available=true}; typedef Packet16h half; }; +template<> struct unpacket_traits<Packet16h> { typedef Eigen::half type; typedef uint16_t mask_t; enum {size=16, alignment=Aligned32, vectorizable=true, masked_load_available=true, masked_store_available=false}; typedef Packet16h half; }; template<> EIGEN_STRONG_INLINE Packet16h pset1<Packet16h>(const Eigen::half& from) { Packet16h result; @@ -611,11 +611,6 @@ template<> EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet1 _mm256_storeu_si256((__m256i*)(void*)to, from.x); } -template<> EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet16h& from, int16_t umask) { - __mmask16 mask = static_cast<__mmask16>(umask); - _mm512_mask_storeu_epi16((__m256i*)(void*)to, mask, from.x); -} - template<> EIGEN_STRONG_INLINE Packet16h ploaddup<Packet16h>(const Eigen::half* from) { Packet16h result; @@ -1069,7 +1064,7 @@ struct packet_traits<Eigen::half> : default_packet_traits { }; -template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet8h half; }; +template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet8h half; }; template<> EIGEN_STRONG_INLINE Packet8h pset1<Packet8h>(const Eigen::half& from) { Packet8h result; @@ -1432,7 +1427,7 @@ struct packet_traits<Eigen::half> : default_packet_traits { }; -template<> struct unpacket_traits<Packet4h> { typedef Eigen::half type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet4h half; }; +template<> struct unpacket_traits<Packet4h> { typedef Eigen::half type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4h half; }; template<> EIGEN_STRONG_INLINE Packet4h pset1<Packet4h>(const Eigen::half& from) { Packet4h result; diff --git a/Eigen/src/Core/arch/MSA/Complex.h b/Eigen/src/Core/arch/MSA/Complex.h index 0ced061c5..c09e1e4fb 100644 --- a/Eigen/src/Core/arch/MSA/Complex.h +++ b/Eigen/src/Core/arch/MSA/Complex.h @@ -127,7 +127,7 @@ struct packet_traits<std::complex<float> > : default_packet_traits { template <> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; - enum { size = 2, alignment = Aligned16, vectorizable=true, masked_load_available=false }; + enum { size = 2, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false }; typedef Packet2cf half; }; @@ -500,7 +500,7 @@ struct packet_traits<std::complex<double> > : default_packet_traits { template <> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; - enum { size = 1, alignment = Aligned16, vectorizable=true, masked_load_available=false }; + enum { size = 1, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false }; typedef Packet1cd half; }; diff --git a/Eigen/src/Core/arch/MSA/PacketMath.h b/Eigen/src/Core/arch/MSA/PacketMath.h index f426d5b83..3c922d4de 100644 --- a/Eigen/src/Core/arch/MSA/PacketMath.h +++ b/Eigen/src/Core/arch/MSA/PacketMath.h @@ -117,14 +117,14 @@ struct packet_traits<int32_t> : default_packet_traits { template <> struct unpacket_traits<Packet4f> { typedef float type; - enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false }; + enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false }; typedef Packet4f half; }; template <> struct unpacket_traits<Packet4i> { typedef int32_t type; - enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false }; + enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false }; typedef Packet4i half; }; @@ -925,7 +925,7 @@ struct packet_traits<double> : default_packet_traits { template <> struct unpacket_traits<Packet2d> { typedef double type; - enum { size = 2, alignment = Aligned16, vectorizable=true, masked_load_available=false }; + enum { size = 2, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false }; typedef Packet2d half; }; diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index c17d0a03e..5a1ed386b 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -62,7 +62,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits }; }; -template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; }; +template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; }; template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) { @@ -340,7 +340,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits }; }; -template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; }; +template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; }; template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); } template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); } diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index b8051cfbf..84153ece1 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -145,13 +145,13 @@ template<> struct unpacket_traits<Packet4f> typedef float type; typedef Packet4f half; typedef Packet4i integer_packet; - enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; + enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; }; template<> struct unpacket_traits<Packet4i> { typedef int32_t type; typedef Packet4i half; - enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; + enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; }; template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); } @@ -657,7 +657,7 @@ template<> struct packet_traits<double> : default_packet_traits }; }; -template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2d half; }; +template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2d half; }; template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return vdupq_n_f64(from); } diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 7d89c323b..b3b1b4854 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -50,7 +50,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits }; #endif -template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; }; +template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; }; template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); } @@ -283,7 +283,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits }; #endif -template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; }; +template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; }; template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); } diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index a4e8be92d..b466d6462 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -166,17 +166,17 @@ template<> struct unpacket_traits<Packet4f> { typedef float type; typedef Packet4f half; typedef Packet4i integer_packet; - enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; + enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; }; template<> struct unpacket_traits<Packet2d> { typedef double type; typedef Packet2d half; - enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; + enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; }; template<> struct unpacket_traits<Packet4i> { typedef int type; typedef Packet4i half; - enum {size=4, alignment=Aligned16, vectorizable=false, masked_load_available=false}; + enum {size=4, alignment=Aligned16, vectorizable=false, masked_load_available=false, masked_store_available=false}; }; #ifndef EIGEN_VECTORIZE_AVX diff --git a/Eigen/src/Core/arch/SYCL/InteropHeaders.h b/Eigen/src/Core/arch/SYCL/InteropHeaders.h index 1afa63bb2..b09d45ea1 100644 --- a/Eigen/src/Core/arch/SYCL/InteropHeaders.h +++ b/Eigen/src/Core/arch/SYCL/InteropHeaders.h @@ -88,7 +88,7 @@ SYCL_ARITHMETIC(cl::sycl::cl_double2) #define SYCL_UNPACKET_TRAITS(packet_type, unpacket_type, lengths)\ template<> struct unpacket_traits<packet_type> {\ typedef unpacket_type type;\ - enum {size=lengths, alignment=Aligned16, vectorizable=true, masked_load_available=false};\ + enum {size=lengths, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};\ typedef packet_type half;\ }; SYCL_UNPACKET_TRAITS(cl::sycl::cl_float4, float, 4) diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h index 9fcbcb82d..8939619f5 100644 --- a/Eigen/src/Core/arch/ZVector/Complex.h +++ b/Eigen/src/Core/arch/ZVector/Complex.h @@ -91,8 +91,8 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits }; }; -template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; }; -template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; }; +template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2cf half; }; +template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet1cd half; }; /* Forward declaration */ EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel); diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h index 74e0a1313..b1c788168 100755 --- a/Eigen/src/Core/arch/ZVector/PacketMath.h +++ b/Eigen/src/Core/arch/ZVector/PacketMath.h @@ -239,9 +239,9 @@ template<> struct packet_traits<double> : default_packet_traits }; }; -template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet4i half; }; -template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet4f half; }; -template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2d half; }; +template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4i half; }; +template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4f half; }; +template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2d half; }; /* Forward declaration */ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel); diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index ce019946b..bbad66d33 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -186,7 +186,8 @@ template<typename T> struct unpacket_traits size = 1, alignment = 1, vectorizable = false, - masked_load_available=false + masked_load_available=false, + masked_store_available=false }; }; diff --git a/test/packetmath.cpp b/test/packetmath.cpp index e704a53ea..d018aaeb0 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -228,6 +228,12 @@ template<typename Scalar,typename Packet> void packetmath() VERIFY(areApprox(data3, data2, PacketSize) && "internal::ploadu masked"); } } + } + + if (internal::unpacket_traits<Packet>::masked_store_available) + { + packet_helper<internal::unpacket_traits<Packet>::masked_store_available, Packet> h; + unsigned long long max_umask = (0x1ull << PacketSize); for (int offset=0; offset<PacketSize; ++offset) { |