aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rwxr-xr-xEigen/src/Core/GenericPacketMath.h15
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h10
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h25
-rw-r--r--test/packetmath.cpp12
4 files changed, 61 insertions, 1 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index d07541285..f9ddf4718 100755
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -386,9 +386,22 @@ template<> inline std::complex<double> pmul(const std::complex<double>& a, const
#endif
+
+/***************************************************************************
+ * Kernel, that is a collection of N packets where N is the number of words
+ * in the packet.
+***************************************************************************/
+template <typename Packet> struct Kernel {
+ Packet packet[unpacket_traits<Packet>::size];
+};
+
+template<typename Packet> EIGEN_DEVICE_FUNC inline void
+ptranspose(Kernel<Packet>& /*kernel*/) {
+ // Nothing to do in the scalar case, i.e. a 1x1 matrix.
+}
+
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_GENERIC_PACKET_MATH_H
-
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index 91bba5e38..2dce66819 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -435,6 +435,16 @@ EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
return Packet1cd(preverse(x.v));
}
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(Kernel<Packet2cf>& kernel) {
+ __m128d w1 = _mm_castps_pd(kernel.packet[0].v);
+ __m128d w2 = _mm_castps_pd(kernel.packet[1].v);
+
+ __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
+ kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
+ kernel.packet[1].v = tmp;
+}
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 9d8faa7d6..937f63f88 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -707,6 +707,31 @@ struct palign_impl<Offset,Packet2d>
};
#endif
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(Kernel<Packet4f>& kernel) {
+ _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(Kernel<Packet2d>& kernel) {
+ __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[1] = tmp;
+}
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(Kernel<Packet4i>& kernel) {
+ __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
+ __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
+ __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
+ __m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
+
+ kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
+ kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
+ kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
+ kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
+}
+
} // end namespace internal
} // end namespace Eigen
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index 5a680d1ee..735af7017 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -208,6 +208,18 @@ template<typename Scalar> void packetmath()
ref[i] = data1[PacketSize-i-1];
internal::pstore(data2, internal::preverse(internal::pload<Packet>(data1)));
VERIFY(areApprox(ref, data2, PacketSize) && "internal::preverse");
+
+ internal::Kernel<Packet> kernel;
+ for (int i=0; i<PacketSize; ++i) {
+ kernel.packet[i] = internal::pload<Packet>(data1+i*PacketSize);
+ }
+ ptranspose(kernel);
+ for (int i=0; i<PacketSize; ++i) {
+ internal::pstore(data2, kernel.packet[i]);
+ for (int j = 0; j < PacketSize; ++j) {
+ VERIFY(isApproxAbs(data2[j], data1[i+j*PacketSize], refvalue));
+ }
+ }
}
template<typename Scalar> void packetmath_real()