diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-03-26 19:03:07 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-03-26 19:03:07 -0700 |
commit | a419cea4a0ff545f3221020119d5eb6ab4cd3e48 (patch) | |
tree | 2ea3a4e92a767bd92003b23235afcc936df32553 /Eigen/src/Core/arch/SSE/Complex.h | |
parent | 14bc4b9704b7e347ffcfe3c52588790e27e5118b (diff) |
Created the ptranspose packet primitive that can transpose an array of N packets, where N is the number of words in each packet. This primitive will be used to complete the vectorization of the gemm_pack_lhs and gemm_pack_rhs functions.
Implemented the primitive using SSE instructions.
Diffstat (limited to 'Eigen/src/Core/arch/SSE/Complex.h')
-rw-r--r-- | Eigen/src/Core/arch/SSE/Complex.h | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 91bba5e38..2dce66819 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -435,6 +435,16 @@ EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x) return Packet1cd(preverse(x.v)); } +template<> EIGEN_DEVICE_FUNC inline void +ptranspose(Kernel<Packet2cf>& kernel) { + __m128d w1 = _mm_castps_pd(kernel.packet[0].v); + __m128d w2 = _mm_castps_pd(kernel.packet[1].v); + + __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2)); + kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2)); + kernel.packet[1].v = tmp; +} + } // end namespace internal } // end namespace Eigen |