aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-01-11 10:28:52 -0800
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-01-11 10:28:52 -0800
commit9396ace46b4638a5946816cacccc565288e6a859 (patch)
tree0b2bb8c7c5496e836c948a8a19e1659e1d2c7a7a /Eigen
parente9936cf2b9665fab9f9aeb87a8fd54474d7ce32a (diff)
parent74882471d0012596e78bffa7ad4e74d940be5059 (diff)
Merge.
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/arch/AVX512/Complex.h32
-rw-r--r--Eigen/src/Core/arch/GPU/PacketMathHalf.h13
2 files changed, 43 insertions, 2 deletions
diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h
index b7e68d2ab..f2034a713 100644
--- a/Eigen/src/Core/arch/AVX512/Complex.h
+++ b/Eigen/src/Core/arch/AVX512/Complex.h
@@ -406,12 +406,40 @@ template<> EIGEN_STRONG_INLINE Packet4cd pcplxflip<Packet4cd>(const Packet4cd& x
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet8cf,4>& kernel) {
- ptranspose(reinterpret_cast<PacketBlock<Packet8d,4>&>(kernel));
+ PacketBlock<Packet8d,4> pb;
+
+ pb.packet[0] = _mm512_castps_pd(kernel.packet[0].v);
+ pb.packet[1] = _mm512_castps_pd(kernel.packet[1].v);
+ pb.packet[2] = _mm512_castps_pd(kernel.packet[2].v);
+ pb.packet[3] = _mm512_castps_pd(kernel.packet[3].v);
+ ptranspose(pb);
+ kernel.packet[0].v = _mm512_castpd_ps(pb.packet[0]);
+ kernel.packet[1].v = _mm512_castpd_ps(pb.packet[1]);
+ kernel.packet[2].v = _mm512_castpd_ps(pb.packet[2]);
+ kernel.packet[3].v = _mm512_castpd_ps(pb.packet[3]);
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet8cf,8>& kernel) {
- ptranspose(reinterpret_cast<PacketBlock<Packet8d,8>&>(kernel));
+ PacketBlock<Packet8d,8> pb;
+
+ pb.packet[0] = _mm512_castps_pd(kernel.packet[0].v);
+ pb.packet[1] = _mm512_castps_pd(kernel.packet[1].v);
+ pb.packet[2] = _mm512_castps_pd(kernel.packet[2].v);
+ pb.packet[3] = _mm512_castps_pd(kernel.packet[3].v);
+ pb.packet[4] = _mm512_castps_pd(kernel.packet[4].v);
+ pb.packet[5] = _mm512_castps_pd(kernel.packet[5].v);
+ pb.packet[6] = _mm512_castps_pd(kernel.packet[6].v);
+ pb.packet[7] = _mm512_castps_pd(kernel.packet[7].v);
+ ptranspose(pb);
+ kernel.packet[0].v = _mm512_castpd_ps(pb.packet[0]);
+ kernel.packet[1].v = _mm512_castpd_ps(pb.packet[1]);
+ kernel.packet[2].v = _mm512_castpd_ps(pb.packet[2]);
+ kernel.packet[3].v = _mm512_castpd_ps(pb.packet[3]);
+ kernel.packet[4].v = _mm512_castpd_ps(pb.packet[4]);
+ kernel.packet[5].v = _mm512_castpd_ps(pb.packet[5]);
+ kernel.packet[6].v = _mm512_castpd_ps(pb.packet[6]);
+ kernel.packet[7].v = _mm512_castpd_ps(pb.packet[7]);
}
EIGEN_DEVICE_FUNC inline void
diff --git a/Eigen/src/Core/arch/GPU/PacketMathHalf.h b/Eigen/src/Core/arch/GPU/PacketMathHalf.h
index 00e40d40b..85a32a18d 100644
--- a/Eigen/src/Core/arch/GPU/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/GPU/PacketMathHalf.h
@@ -143,7 +143,11 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pabs<half2>(const half2&
return result;
}
+<<<<<<< working copy
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ptrue<half2>(const half2& a) {
+=======
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pones<half2>(const half2& a) {
+>>>>>>> merge rev
half2 result;
*(reinterpret_cast<unsigned*>(&(result))) = 0xffffffffu;
}
@@ -648,8 +652,13 @@ template<> EIGEN_STRONG_INLINE Packet16h pnot(const Packet16h& a) {
Packet16h r; r.x = _mm256_xor_si256(a.x, pcmp_eq(a.x, a.x)); return r;
}
+<<<<<<< working copy
template<> EIGEN_STRONG_INLINE Packet16h ptrue(const Packet16h& a) {
Packet16h r; r.x = Packet8i(ptrue(a.x)); return r;
+=======
+template<> EIGEN_STRONG_INLINE Packet16h pones(const Packet16h& a) {
+ Packet16h r; r.x = Packet8i(pones(a.x)); return r;
+>>>>>>> merge rev
}
template<> EIGEN_STRONG_INLINE Packet16h por(const Packet16h& a,const Packet16h& b) {
@@ -1097,7 +1106,11 @@ EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f& a) {
#endif
}
+<<<<<<< working copy
template<> EIGEN_STRONG_INLINE Packet8h ptrue(const Packet8h& a) {
+=======
+template<> EIGEN_STRONG_INLINE Packet8h pones(const Packet8h& a) {
+>>>>>>> merge rev
Packet8h r; r.x = _mm_cmpeq_epi32(a.x, a.x); return r;
}