aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/src/Core/GenericPacketMath.h8
-rw-r--r--Eigen/src/Core/arch/AVX/Complex.h4
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h15
-rw-r--r--Eigen/src/Core/arch/AVX512/Complex.h6
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h13
-rw-r--r--Eigen/src/Core/arch/AltiVec/Complex.h4
-rwxr-xr-xEigen/src/Core/arch/AltiVec/PacketMath.h6
-rw-r--r--Eigen/src/Core/arch/GPU/PacketMath.h4
-rw-r--r--Eigen/src/Core/arch/GPU/PacketMathHalf.h16
-rw-r--r--Eigen/src/Core/arch/MSA/Complex.h4
-rw-r--r--Eigen/src/Core/arch/MSA/PacketMath.h6
-rw-r--r--Eigen/src/Core/arch/NEON/Complex.h4
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h6
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h4
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h6
-rw-r--r--Eigen/src/Core/arch/SYCL/InteropHeaders.h2
-rw-r--r--Eigen/src/Core/arch/ZVector/Complex.h4
-rwxr-xr-xEigen/src/Core/arch/ZVector/PacketMath.h6
-rw-r--r--Eigen/src/Core/util/XprHelper.h3
-rw-r--r--test/packetmath.cpp23
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h24
21 files changed, 125 insertions, 43 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 04a321b9f..3bba02278 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -287,6 +287,14 @@ pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
+/** \internal \returns a packet version of \a *from, (un-aligned masked load)
+ * There is no generic implementation. We only have implementations for specialized
+ * cases. Generic case should not be called.
+ */
+template<typename Packet> EIGEN_DEVICE_FUNC inline
+typename enable_if<unpacket_traits<Packet>::masked_load_available, Packet>::type
+ploadu(const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
+
/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index 5b8ff59bd..3d229cd81 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h
@@ -47,7 +47,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
#endif
-template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32, vectorizable=true}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
@@ -263,7 +263,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
#endif
-template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32, vectorizable=true}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index f88e36024..3f94f85eb 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -117,14 +117,15 @@ template<> struct unpacket_traits<Packet8f> {
typedef float type;
typedef Packet4f half;
typedef Packet8i integer_packet;
- enum {size=8, alignment=Aligned32, vectorizable=true};
+ typedef uint8_t mask_t;
+ enum {size=8, alignment=Aligned32, vectorizable=true, masked_load_available=true};
};
template<> struct unpacket_traits<Packet4d> {
typedef double type;
typedef Packet2d half;
- enum {size=4, alignment=Aligned32, vectorizable=true};
+ enum {size=4, alignment=Aligned32, vectorizable=true, masked_load_available=false};
};
-template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32, vectorizable=false}; };
+template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32, vectorizable=false, masked_load_available=false}; };
template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
@@ -385,6 +386,14 @@ template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from) { EI
template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_pd(from); }
template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
+template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from, uint8_t umask) {
+ __m256i mask = _mm256_set1_epi8(static_cast<char>(umask));
+ const __m256i bit_mask = _mm256_set_epi32(0xffffff7f, 0xffffffbf, 0xffffffdf, 0xffffffef, 0xfffffff7, 0xfffffffb, 0xfffffffd, 0xfffffffe);
+ mask = _mm256_or_si256(mask, bit_mask);
+ mask = _mm256_cmpeq_epi32(mask, _mm256_set1_epi32(0xffffffff));
+ EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_maskload_ps(from, mask);
+}
+
// Loads 4 floats from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3, a3}
template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
{
diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h
index 9a89dd01f..5ab2ffe77 100644
--- a/Eigen/src/Core/arch/AVX512/Complex.h
+++ b/Eigen/src/Core/arch/AVX512/Complex.h
@@ -51,7 +51,8 @@ template<> struct unpacket_traits<Packet8cf> {
enum {
size = 8,
alignment=unpacket_traits<Packet16f>::alignment,
- vectorizable=true
+ vectorizable=true,
+ masked_load_available=false
};
typedef Packet4cf half;
};
@@ -247,7 +248,8 @@ template<> struct unpacket_traits<Packet4cd> {
enum {
size = 4,
alignment = unpacket_traits<Packet8d>::alignment,
- vectorizable=true
+ vectorizable=true,
+ masked_load_available=false
};
typedef Packet2cd half;
};
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 60b723b08..094309eda 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -102,19 +102,20 @@ struct unpacket_traits<Packet16f> {
typedef float type;
typedef Packet8f half;
typedef Packet16i integer_packet;
- enum { size = 16, alignment=Aligned64, vectorizable=true };
+ typedef uint16_t mask_t;
+ enum { size = 16, alignment=Aligned64, vectorizable=true, masked_load_available=true };
};
template <>
struct unpacket_traits<Packet8d> {
typedef double type;
typedef Packet4d half;
- enum { size = 8, alignment=Aligned64, vectorizable=true };
+ enum { size = 8, alignment=Aligned64, vectorizable=true, masked_load_available=false };
};
template <>
struct unpacket_traits<Packet16i> {
typedef int type;
typedef Packet8i half;
- enum { size = 16, alignment=Aligned64, vectorizable=false };
+ enum { size = 16, alignment=Aligned64, vectorizable=false, masked_load_available=false };
};
template <>
@@ -485,6 +486,12 @@ EIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) {
reinterpret_cast<const __m512i*>(from));
}
+template <>
+EIGEN_STRONG_INLINE Packet16f ploadu<Packet16f>(const float* from, uint16_t umask) {
+ __mmask16 mask = static_cast<__mmask16>(umask);
+ EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_maskz_loadu_ps(mask, from);
+}
+
// Loads 8 floats from memory a returns the packet
// {a0, a0 a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7}
template <>
diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h
index 0c9f8e05d..ebc3b2aeb 100644
--- a/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -60,7 +60,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
@@ -291,7 +291,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); }
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 9535724eb..b5484e6aa 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -192,13 +192,13 @@ template<> struct unpacket_traits<Packet4f>
typedef float type;
typedef Packet4f half;
typedef Packet4i integer_packet;
- enum {size=4, alignment=Aligned16, vectorizable=true};
+ enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false};
};
template<> struct unpacket_traits<Packet4i>
{
typedef int type;
typedef Packet4i half;
- enum {size=4, alignment=Aligned16, vectorizable=false};
+ enum {size=4, alignment=Aligned16, vectorizable=false, masked_load_available=false};
};
inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v)
@@ -921,7 +921,7 @@ template<> struct packet_traits<double> : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2d half; };
inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
{
diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h
index cd4615a45..7fac0a5e1 100644
--- a/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/Eigen/src/Core/arch/GPU/PacketMath.h
@@ -92,8 +92,8 @@ template<> struct packet_traits<double> : default_packet_traits
};
-template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true}; typedef float4 half; };
-template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef double2 half; };
+template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef float4 half; };
+template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef double2 half; };
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) {
return make_float4(from, from, from, from);
diff --git a/Eigen/src/Core/arch/GPU/PacketMathHalf.h b/Eigen/src/Core/arch/GPU/PacketMathHalf.h
index cd518c7e4..2bee56f0f 100644
--- a/Eigen/src/Core/arch/GPU/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/GPU/PacketMathHalf.h
@@ -42,7 +42,7 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits
};
};
-template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef half2 half; };
+template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef half2 half; };
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
return __half2half2(from);
@@ -567,7 +567,7 @@ struct packet_traits<half> : default_packet_traits {
};
-template<> struct unpacket_traits<Packet16h> { typedef Eigen::half type; enum {size=16, alignment=Aligned32, vectorizable=true}; typedef Packet16h half; };
+template<> struct unpacket_traits<Packet16h> { typedef Eigen::half type; typedef uint16_t mask_t; enum {size=16, alignment=Aligned32, vectorizable=true, masked_load_available=true}; typedef Packet16h half; };
template<> EIGEN_STRONG_INLINE Packet16h pset1<Packet16h>(const Eigen::half& from) {
Packet16h result;
@@ -591,6 +591,14 @@ template<> EIGEN_STRONG_INLINE Packet16h ploadu<Packet16h>(const Eigen::half* fr
return result;
}
+template<> EIGEN_STRONG_INLINE Packet16h ploadu<Packet16h>(const Eigen::half* from,
+ uint16_t umask) {
+ __mmask16 mask = static_cast<__mmask16>(umask);
+ Packet16h result;
+ result.x = _mm256_maskz_loadu_epi16(mask, from);
+ return result;
+}
+
template<> EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet16h& from) {
// (void*) -> workaround clang warning:
// cast from 'Eigen::half *' to '__m256i *' increases required alignment from 2 to 32
@@ -1056,7 +1064,7 @@ struct packet_traits<Eigen::half> : default_packet_traits {
};
-template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true}; typedef Packet8h half; };
+template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet8h half; };
template<> EIGEN_STRONG_INLINE Packet8h pset1<Packet8h>(const Eigen::half& from) {
Packet8h result;
@@ -1419,7 +1427,7 @@ struct packet_traits<Eigen::half> : default_packet_traits {
};
-template<> struct unpacket_traits<Packet4h> { typedef Eigen::half type; enum {size=4, alignment=Aligned16, vectorizable=true}; typedef Packet4h half; };
+template<> struct unpacket_traits<Packet4h> { typedef Eigen::half type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet4h half; };
template<> EIGEN_STRONG_INLINE Packet4h pset1<Packet4h>(const Eigen::half& from) {
Packet4h result;
diff --git a/Eigen/src/Core/arch/MSA/Complex.h b/Eigen/src/Core/arch/MSA/Complex.h
index fa64d3564..0ced061c5 100644
--- a/Eigen/src/Core/arch/MSA/Complex.h
+++ b/Eigen/src/Core/arch/MSA/Complex.h
@@ -127,7 +127,7 @@ struct packet_traits<std::complex<float> > : default_packet_traits {
template <>
struct unpacket_traits<Packet2cf> {
typedef std::complex<float> type;
- enum { size = 2, alignment = Aligned16, vectorizable=true };
+ enum { size = 2, alignment = Aligned16, vectorizable=true, masked_load_available=false };
typedef Packet2cf half;
};
@@ -500,7 +500,7 @@ struct packet_traits<std::complex<double> > : default_packet_traits {
template <>
struct unpacket_traits<Packet1cd> {
typedef std::complex<double> type;
- enum { size = 1, alignment = Aligned16, vectorizable=true };
+ enum { size = 1, alignment = Aligned16, vectorizable=true, masked_load_available=false };
typedef Packet1cd half;
};
diff --git a/Eigen/src/Core/arch/MSA/PacketMath.h b/Eigen/src/Core/arch/MSA/PacketMath.h
index a97156a84..f426d5b83 100644
--- a/Eigen/src/Core/arch/MSA/PacketMath.h
+++ b/Eigen/src/Core/arch/MSA/PacketMath.h
@@ -117,14 +117,14 @@ struct packet_traits<int32_t> : default_packet_traits {
template <>
struct unpacket_traits<Packet4f> {
typedef float type;
- enum { size = 4, alignment = Aligned16, vectorizable=true };
+ enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false };
typedef Packet4f half;
};
template <>
struct unpacket_traits<Packet4i> {
typedef int32_t type;
- enum { size = 4, alignment = Aligned16, vectorizable=true };
+ enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false };
typedef Packet4i half;
};
@@ -925,7 +925,7 @@ struct packet_traits<double> : default_packet_traits {
template <>
struct unpacket_traits<Packet2d> {
typedef double type;
- enum { size = 2, alignment = Aligned16, vectorizable=true };
+ enum { size = 2, alignment = Aligned16, vectorizable=true, masked_load_available=false };
typedef Packet2d half;
};
diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h
index f6c5c211c..c17d0a03e 100644
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -62,7 +62,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
@@ -340,7 +340,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index e8b351849..b8051cfbf 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -145,13 +145,13 @@ template<> struct unpacket_traits<Packet4f>
typedef float type;
typedef Packet4f half;
typedef Packet4i integer_packet;
- enum {size=4, alignment=Aligned16, vectorizable=true};
+ enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false};
};
template<> struct unpacket_traits<Packet4i>
{
typedef int32_t type;
typedef Packet4i half;
- enum {size=4, alignment=Aligned16, vectorizable=true};
+ enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false};
};
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
@@ -657,7 +657,7 @@ template<> struct packet_traits<double> : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2d half; };
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return vdupq_n_f64(from); }
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index f39988eac..7d89c323b 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -50,7 +50,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
#endif
-template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
@@ -283,7 +283,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
#endif
-template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 9c3750af0..04b63601b 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -166,17 +166,17 @@ template<> struct unpacket_traits<Packet4f> {
typedef float type;
typedef Packet4f half;
typedef Packet4i integer_packet;
- enum {size=4, alignment=Aligned16, vectorizable=true};
+ enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false};
};
template<> struct unpacket_traits<Packet2d> {
typedef double type;
typedef Packet2d half;
- enum {size=2, alignment=Aligned16, vectorizable=true};
+ enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false};
};
template<> struct unpacket_traits<Packet4i> {
typedef int type;
typedef Packet4i half;
- enum {size=4, alignment=Aligned16, vectorizable=false};
+ enum {size=4, alignment=Aligned16, vectorizable=false, masked_load_available=false};
};
#ifndef EIGEN_VECTORIZE_AVX
diff --git a/Eigen/src/Core/arch/SYCL/InteropHeaders.h b/Eigen/src/Core/arch/SYCL/InteropHeaders.h
index 294cb101a..1afa63bb2 100644
--- a/Eigen/src/Core/arch/SYCL/InteropHeaders.h
+++ b/Eigen/src/Core/arch/SYCL/InteropHeaders.h
@@ -88,7 +88,7 @@ SYCL_ARITHMETIC(cl::sycl::cl_double2)
#define SYCL_UNPACKET_TRAITS(packet_type, unpacket_type, lengths)\
template<> struct unpacket_traits<packet_type> {\
typedef unpacket_type type;\
- enum {size=lengths, alignment=Aligned16, vectorizable=true};\
+ enum {size=lengths, alignment=Aligned16, vectorizable=true, masked_load_available=false};\
typedef packet_type half;\
};
SYCL_UNPACKET_TRAITS(cl::sycl::cl_float4, float, 4)
diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h
index 167c3ee4c..9fcbcb82d 100644
--- a/Eigen/src/Core/arch/ZVector/Complex.h
+++ b/Eigen/src/Core/arch/ZVector/Complex.h
@@ -91,8 +91,8 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2cf half; };
-template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet1cd half; };
/* Forward declaration */
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel);
diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h
index c8e90f1a8..74e0a1313 100755
--- a/Eigen/src/Core/arch/ZVector/PacketMath.h
+++ b/Eigen/src/Core/arch/ZVector/PacketMath.h
@@ -239,9 +239,9 @@ template<> struct packet_traits<double> : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16, vectorizable=true}; typedef Packet4i half; };
-template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true}; typedef Packet4f half; };
-template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet4i half; };
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false}; typedef Packet2d half; };
/* Forward declaration */
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel);
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 91c2e42e4..ce019946b 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -185,7 +185,8 @@ template<typename T> struct unpacket_traits
{
size = 1,
alignment = 1,
- vectorizable = false
+ vectorizable = false,
+ masked_load_available=false
};
};
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index 4906f6eb0..200670b8c 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -119,6 +119,9 @@ struct packet_helper
inline Packet load(const T* from) const { return internal::pload<Packet>(from); }
template<typename T>
+ inline Packet load(const T* from, unsigned long long umask) const { return internal::ploadu<Packet>(from, umask); }
+
+ template<typename T>
inline void store(T* to, const Packet& x) const { internal::pstore(to,x); }
};
@@ -129,6 +132,9 @@ struct packet_helper<false,Packet>
inline T load(const T* from) const { return *from; }
template<typename T>
+ inline T load(const T* from, unsigned long long) const { return *from; }
+
+ template<typename T>
inline void store(T* to, const T& x) const { *to = x; }
};
@@ -169,6 +175,7 @@ template<typename Scalar,typename Packet> void packetmath()
const int size = PacketSize*max_size;
EIGEN_ALIGN_MAX Scalar data1[size];
EIGEN_ALIGN_MAX Scalar data2[size];
+ EIGEN_ALIGN_MAX Scalar data3[size];
EIGEN_ALIGN_MAX Packet packets[PacketSize*2];
EIGEN_ALIGN_MAX Scalar ref[size];
RealScalar refvalue = RealScalar(0);
@@ -194,6 +201,22 @@ template<typename Scalar,typename Packet> void packetmath()
VERIFY(areApprox(data1, data2+offset, PacketSize) && "internal::pstoreu");
}
+ if (internal::unpacket_traits<Packet>::masked_load_available)
+ {
+ unsigned long long max_umask = (0x1ull << PacketSize);
+ for (int offset=0; offset<PacketSize; ++offset)
+ {
+ for (unsigned long long umask=0; umask<max_umask; ++umask)
+ {
+ packet_helper<internal::unpacket_traits<Packet>::masked_load_available, Packet> h;
+ h.store(data2, h.load(data1+offset, umask));
+ for (int k=0; k<PacketSize; ++k)
+ data3[k] = ((umask & ( 0x1ull << k )) >> k) ? data1[k+offset] : Scalar(0);
+ VERIFY(areApprox(data3, data2, PacketSize) && "internal::ploadu masked");
+ }
+ }
+ }
+
for (int offset=0; offset<PacketSize; ++offset)
{
#define MIN(A,B) (A<B?A:B)
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index 4ca6b3d8c..05c684ff9 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -95,6 +95,18 @@ struct TensorEvaluator
return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
}
+ // Return a packet starting at `index` where `umask` specifies which elements
+ // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for
+ // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding
+ // float element will be loaded, otherwise 0 will be loaded.
+ // Function has been templatized to enable Sfinae.
+ template <typename PacketReturnTypeT = PacketReturnType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
+ partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
+ {
+ return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
+ }
+
template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketReturnType& x)
{
@@ -244,6 +256,18 @@ struct TensorEvaluator<const Derived, Device>
return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index);
}
+ // Return a packet starting at `index` where `umask` specifies which elements
+ // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for
+ // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding
+ // float element will be loaded, otherwise 0 will be loaded.
+ // Function has been templatized to enable Sfinae.
+ template <typename PacketReturnTypeT = PacketReturnType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
+ partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
+ {
+ return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
+ }
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
eigen_assert(m_data);
const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)