aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SSE/Complex.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-03-26 19:03:07 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-03-26 19:03:07 -0700
commita419cea4a0ff545f3221020119d5eb6ab4cd3e48 (patch)
tree2ea3a4e92a767bd92003b23235afcc936df32553 /Eigen/src/Core/arch/SSE/Complex.h
parent14bc4b9704b7e347ffcfe3c52588790e27e5118b (diff)
Created the ptranspose packet primitive that can transpose an array of N packets, where N is the number of words in each packet. This primitive will be used to complete the vectorization of the gemm_pack_lhs and gemm_pack_rhs functions.
Implemented the primitive using SSE instructions.
Diffstat (limited to 'Eigen/src/Core/arch/SSE/Complex.h')
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h10
1 files changed, 10 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index 91bba5e38..2dce66819 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -435,6 +435,16 @@ EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
return Packet1cd(preverse(x.v));
}
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(Kernel<Packet2cf>& kernel) {
+ __m128d w1 = _mm_castps_pd(kernel.packet[0].v);
+ __m128d w2 = _mm_castps_pd(kernel.packet[1].v);
+
+ __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
+ kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
+ kernel.packet[1].v = tmp;
+}
+
} // end namespace internal
} // end namespace Eigen