diff options
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/Complex.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/PacketMath.h | 15 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX512/Complex.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX512/PacketMath.h | 13 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/Complex.h | 4 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/AltiVec/PacketMath.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/GPU/PacketMath.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/GPU/PacketMathHalf.h | 16 | ||||
-rw-r--r-- | Eigen/src/Core/arch/MSA/Complex.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/MSA/PacketMath.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/Complex.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/Complex.h | 4 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/SSE/PacketMath.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SYCL/InteropHeaders.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/arch/ZVector/Complex.h | 4 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/ZVector/PacketMath.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/util/XprHelper.h | 3 |
19 files changed, 78 insertions, 43 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 04a321b9f..3bba02278 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -287,6 +287,14 @@ pload(const typename unpacket_traits<Packet>::type* from) { return *from; } template<typename Packet> EIGEN_DEVICE_FUNC inline Packet ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; } +/** \internal \returns a packet version of \a *from, (un-aligned masked load) + * There is no generic implementation. We only have implementations for specialized + * cases. Generic case should not be called. + */ +template<typename Packet> EIGEN_DEVICE_FUNC inline +typename enable_if<unpacket_traits<Packet>::masked_load_available, Packet>::type +ploadu(const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask); + /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pset1(const typename unpacket_traits<Packet>::type& a) { return a; } diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index 5b8ff59bd..3d229cd81 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -47,7 +47,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits }; #endif -template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32, vectorizable=true}; typedef Packet2cf half; }; +template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; }; template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); } @@ -263,7 +263,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits }; #endif -template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32, vectorizable=true}; typedef Packet1cd half; }; +template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; }; template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); } diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index f88e36024..3f94f85eb 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -117,14 +117,15 @@ template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; typedef Packet8i integer_packet; - enum {size=8, alignment=Aligned32, vectorizable=true}; + typedef uint8_t mask_t; + enum {size=8, alignment=Aligned32, vectorizable=true, masked_load_available=true}; }; template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; - enum {size=4, alignment=Aligned32, vectorizable=true}; + enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false}; }; -template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32, vectorizable=false}; }; +template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32, vectorizable=false, masked_load_available=false}; }; template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); } template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); } @@ -385,6 +386,14 @@ template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from) { EI template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_pd(from); } template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); } +template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from, uint8_t umask) { + __m256i mask = _mm256_set1_epi8(static_cast<char>(umask)); + const __m256i bit_mask = _mm256_set_epi32(0xffffff7f, 0xffffffbf, 0xffffffdf, 0xffffffef, 0xfffffff7, 0xfffffffb, 0xfffffffd, 0xfffffffe); + mask = _mm256_or_si256(mask, bit_mask); + mask = _mm256_cmpeq_epi32(mask, _mm256_set1_epi32(0xffffffff)); + EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_maskload_ps(from, mask); +} + // Loads 4 floats from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3, a3} template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from) { diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h index 9a89dd01f..5ab2ffe77 100644 --- a/Eigen/src/Core/arch/AVX512/Complex.h +++ b/Eigen/src/Core/arch/AVX512/Complex.h @@ -51,7 +51,8 @@ template<> struct unpacket_traits<Packet8cf> { enum { size = 8, alignment=unpacket_traits<Packet16f>::alignment, - vectorizable=true + vectorizable=true, + masked_load_available=false }; typedef Packet4cf half; }; @@ -247,7 +248,8 @@ template<> struct unpacket_traits<Packet4cd> { enum { size = 4, alignment = unpacket_traits<Packet8d>::alignment, - vectorizable=true + vectorizable=true, + masked_load_available=false }; typedef Packet2cd half; }; diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 60b723b08..094309eda 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -102,19 +102,20 @@ struct unpacket_traits<Packet16f> { typedef float type; typedef Packet8f half; typedef Packet16i integer_packet; - enum { size = 16, alignment=Aligned64, vectorizable=true }; + typedef uint16_t mask_t; + enum { size = 16, alignment=Aligned64, vectorizable=true, masked_load_available=true }; }; template <> struct unpacket_traits<Packet8d> { typedef double type; typedef Packet4d half; - enum { size = 8, alignment=Aligned64, vectorizable=true }; + enum { size = 8, alignment=Aligned64, vectorizable=true, masked_load_available=false }; }; template <> struct unpacket_traits<Packet16i> { typedef int type; typedef Packet8i half; - enum { size = 16, alignment=Aligned64, vectorizable=false }; + enum { size = 16, alignment=Aligned64, vectorizable=false, masked_load_available=false }; }; template <> @@ -485,6 +486,12 @@ EIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) { reinterpret_cast<const __m512i*>(from)); } +template <> +EIGEN_STRONG_INLINE Packet16f ploadu<Packet16f>(const float* from, uint16_t umask) { + __mmask16 mask = static_cast<__mmask16>(umask); + EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_maskz_loadu_ps(mask, from); +} + // Loads 8 floats from memory a returns the packet // {a0, a0 a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7} template <> diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index 0c9f8e05d..ebc3b2aeb 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -60,7 +60,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits }; }; -template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2cf half; }; +template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; }; template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) { @@ -291,7 +291,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits }; }; -template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true}; typedef Packet1cd half; }; +template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; }; template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); } template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); } diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 9535724eb..b5484e6aa 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -192,13 +192,13 @@ template<> struct unpacket_traits<Packet4f> typedef float type; typedef Packet4f half; typedef Packet4i integer_packet; - enum {size=4, alignment=Aligned16, vectorizable=true}; + enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; }; template<> struct unpacket_traits<Packet4i> { typedef int type; typedef Packet4i half; - enum {size=4, alignment=Aligned16, vectorizable=false}; + enum {size=4, alignment=Aligned16, vectorizable=false, masked_load_available=false}; }; inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v) @@ -921,7 +921,7 @@ template<> struct packet_traits<double> : default_packet_traits }; }; -template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2d half; }; +template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2d half; }; inline std::ostream & operator <<(std::ostream & s, const Packet2l & v) { diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h index cd4615a45..7fac0a5e1 100644 --- a/Eigen/src/Core/arch/GPU/PacketMath.h +++ b/Eigen/src/Core/arch/GPU/PacketMath.h @@ -92,8 +92,8 @@ template<> struct packet_traits<double> : default_packet_traits }; -template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true}; typedef float4 half; }; -template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef double2 half; }; +template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef float4 half; }; +template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef double2 half; }; template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) { return make_float4(from, from, from, from); diff --git a/Eigen/src/Core/arch/GPU/PacketMathHalf.h b/Eigen/src/Core/arch/GPU/PacketMathHalf.h index cd518c7e4..2bee56f0f 100644 --- a/Eigen/src/Core/arch/GPU/PacketMathHalf.h +++ b/Eigen/src/Core/arch/GPU/PacketMathHalf.h @@ -42,7 +42,7 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits }; }; -template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef half2 half; }; +template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef half2 half; }; template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) { return __half2half2(from); @@ -567,7 +567,7 @@ struct packet_traits<half> : default_packet_traits { }; -template<> struct unpacket_traits<Packet16h> { typedef Eigen::half type; enum {size=16, alignment=Aligned32, vectorizable=true}; typedef Packet16h half; }; +template<> struct unpacket_traits<Packet16h> { typedef Eigen::half type; typedef uint16_t mask_t; enum {size=16, alignment=Aligned32, vectorizable=true, masked_load_available=true}; typedef Packet16h half; }; template<> EIGEN_STRONG_INLINE Packet16h pset1<Packet16h>(const Eigen::half& from) { Packet16h result; @@ -591,6 +591,14 @@ template<> EIGEN_STRONG_INLINE Packet16h ploadu<Packet16h>(const Eigen::half* fr return result; } +template<> EIGEN_STRONG_INLINE Packet16h ploadu<Packet16h>(const Eigen::half* from, + uint16_t umask) { + __mmask16 mask = static_cast<__mmask16>(umask); + Packet16h result; + result.x = _mm256_maskz_loadu_epi16(mask, from); + return result; +} + template<> EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet16h& from) { // (void*) -> workaround clang warning: // cast from 'Eigen::half *' to '__m256i *' increases required alignment from 2 to 32 @@ -1056,7 +1064,7 @@ struct packet_traits<Eigen::half> : default_packet_traits { }; -template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true}; typedef Packet8h half; }; +template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet8h half; }; template<> EIGEN_STRONG_INLINE Packet8h pset1<Packet8h>(const Eigen::half& from) { Packet8h result; @@ -1419,7 +1427,7 @@ struct packet_traits<Eigen::half> : default_packet_traits { }; -template<> struct unpacket_traits<Packet4h> { typedef Eigen::half type; enum {size=4, alignment=Aligned16, vectorizable=true}; typedef Packet4h half; }; +template<> struct unpacket_traits<Packet4h> { typedef Eigen::half type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet4h half; }; template<> EIGEN_STRONG_INLINE Packet4h pset1<Packet4h>(const Eigen::half& from) { Packet4h result; diff --git a/Eigen/src/Core/arch/MSA/Complex.h b/Eigen/src/Core/arch/MSA/Complex.h index fa64d3564..0ced061c5 100644 --- a/Eigen/src/Core/arch/MSA/Complex.h +++ b/Eigen/src/Core/arch/MSA/Complex.h @@ -127,7 +127,7 @@ struct packet_traits<std::complex<float> > : default_packet_traits { template <> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; - enum { size = 2, alignment = Aligned16, vectorizable=true }; + enum { size = 2, alignment = Aligned16, vectorizable=true, masked_load_available=false }; typedef Packet2cf half; }; @@ -500,7 +500,7 @@ struct packet_traits<std::complex<double> > : default_packet_traits { template <> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; - enum { size = 1, alignment = Aligned16, vectorizable=true }; + enum { size = 1, alignment = Aligned16, vectorizable=true, masked_load_available=false }; typedef Packet1cd half; }; diff --git a/Eigen/src/Core/arch/MSA/PacketMath.h b/Eigen/src/Core/arch/MSA/PacketMath.h index a97156a84..f426d5b83 100644 --- a/Eigen/src/Core/arch/MSA/PacketMath.h +++ b/Eigen/src/Core/arch/MSA/PacketMath.h @@ -117,14 +117,14 @@ struct packet_traits<int32_t> : default_packet_traits { template <> struct unpacket_traits<Packet4f> { typedef float type; - enum { size = 4, alignment = Aligned16, vectorizable=true }; + enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false }; typedef Packet4f half; }; template <> struct unpacket_traits<Packet4i> { typedef int32_t type; - enum { size = 4, alignment = Aligned16, vectorizable=true }; + enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false }; typedef Packet4i half; }; @@ -925,7 +925,7 @@ struct packet_traits<double> : default_packet_traits { template <> struct unpacket_traits<Packet2d> { typedef double type; - enum { size = 2, alignment = Aligned16, vectorizable=true }; + enum { size = 2, alignment = Aligned16, vectorizable=true, masked_load_available=false }; typedef Packet2d half; }; diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index f6c5c211c..c17d0a03e 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -62,7 +62,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits }; }; -template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2cf half; }; +template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; }; template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) { @@ -340,7 +340,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits }; }; -template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true}; typedef Packet1cd half; }; +template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; }; template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); } template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); } diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index e8b351849..b8051cfbf 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -145,13 +145,13 @@ template<> struct unpacket_traits<Packet4f> typedef float type; typedef Packet4f half; typedef Packet4i integer_packet; - enum {size=4, alignment=Aligned16, vectorizable=true}; + enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; }; template<> struct unpacket_traits<Packet4i> { typedef int32_t type; typedef Packet4i half; - enum {size=4, alignment=Aligned16, vectorizable=true}; + enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; }; template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); } @@ -657,7 +657,7 @@ template<> struct packet_traits<double> : default_packet_traits }; }; -template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2d half; }; +template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2d half; }; template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return vdupq_n_f64(from); } diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index f39988eac..7d89c323b 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -50,7 +50,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits }; #endif -template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2cf half; }; +template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; }; template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); } @@ -283,7 +283,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits }; #endif -template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true}; typedef Packet1cd half; }; +template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; }; template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); } diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 9c3750af0..04b63601b 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -166,17 +166,17 @@ template<> struct unpacket_traits<Packet4f> { typedef float type; typedef Packet4f half; typedef Packet4i integer_packet; - enum {size=4, alignment=Aligned16, vectorizable=true}; + enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; }; template<> struct unpacket_traits<Packet2d> { typedef double type; typedef Packet2d half; - enum {size=2, alignment=Aligned16, vectorizable=true}; + enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; }; template<> struct unpacket_traits<Packet4i> { typedef int type; typedef Packet4i half; - enum {size=4, alignment=Aligned16, vectorizable=false}; + enum {size=4, alignment=Aligned16, vectorizable=false, masked_load_available=false}; }; #ifndef EIGEN_VECTORIZE_AVX diff --git a/Eigen/src/Core/arch/SYCL/InteropHeaders.h b/Eigen/src/Core/arch/SYCL/InteropHeaders.h index 294cb101a..1afa63bb2 100644 --- a/Eigen/src/Core/arch/SYCL/InteropHeaders.h +++ b/Eigen/src/Core/arch/SYCL/InteropHeaders.h @@ -88,7 +88,7 @@ SYCL_ARITHMETIC(cl::sycl::cl_double2) #define SYCL_UNPACKET_TRAITS(packet_type, unpacket_type, lengths)\ template<> struct unpacket_traits<packet_type> {\ typedef unpacket_type type;\ - enum {size=lengths, alignment=Aligned16, vectorizable=true};\ + enum {size=lengths, alignment=Aligned16, vectorizable=true, masked_load_available=false};\ typedef packet_type half;\ }; SYCL_UNPACKET_TRAITS(cl::sycl::cl_float4, float, 4) diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h index 167c3ee4c..9fcbcb82d 100644 --- a/Eigen/src/Core/arch/ZVector/Complex.h +++ b/Eigen/src/Core/arch/ZVector/Complex.h @@ -91,8 +91,8 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits }; }; -template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2cf half; }; -template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true}; typedef Packet1cd half; }; +template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; }; +template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; }; /* Forward declaration */ EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel); diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h index c8e90f1a8..74e0a1313 100755 --- a/Eigen/src/Core/arch/ZVector/PacketMath.h +++ b/Eigen/src/Core/arch/ZVector/PacketMath.h @@ -239,9 +239,9 @@ template<> struct packet_traits<double> : default_packet_traits }; }; -template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16, vectorizable=true}; typedef Packet4i half; }; -template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true}; typedef Packet4f half; }; -template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2d half; }; +template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet4i half; }; +template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet4f half; }; +template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2d half; }; /* Forward declaration */ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel); diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 91c2e42e4..ce019946b 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -185,7 +185,8 @@ template<typename T> struct unpacket_traits { size = 1, alignment = 1, - vectorizable = false + vectorizable = false, + masked_load_available=false }; }; |