From a419cea4a0ff545f3221020119d5eb6ab4cd3e48 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 26 Mar 2014 19:03:07 -0700 Subject: Created the ptranspose packet primitive that can transpose an array of N packets, where N is the number of words in each packet. This primitive will be used to complete the vectorization of the gemm_pack_lhs and gemm_pack_rhs functions. Implemented the primitive using SSE instructions. --- Eigen/src/Core/arch/SSE/Complex.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'Eigen/src/Core/arch/SSE/Complex.h') diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 91bba5e38..2dce66819 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -435,6 +435,16 @@ EIGEN_STRONG_INLINE Packet1cd pcplxflip/**/(const Packet1cd& x) return Packet1cd(preverse(x.v)); } +template<> EIGEN_DEVICE_FUNC inline void +ptranspose(Kernel& kernel) { + __m128d w1 = _mm_castps_pd(kernel.packet[0].v); + __m128d w2 = _mm_castps_pd(kernel.packet[1].v); + + __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2)); + kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2)); + kernel.packet[1].v = tmp; +} + } // end namespace internal } // end namespace Eigen -- cgit v1.2.3