diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-03-27 09:34:51 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-03-27 09:34:51 -0700 |
commit | c4902a3d0182dfc9ac02a24ec2a52cd567ac0104 (patch) | |
tree | ab2c8d944d37c1d32e1da6eb3bd4929d4588e024 /Eigen/src/Core/arch/AVX/Complex.h | |
parent | 7d73c7f18be20407ffd28f72b3275da233658f80 (diff) |
Implemented the AVX version of the ptranspose packet primitive.
Diffstat (limited to 'Eigen/src/Core/arch/AVX/Complex.h')
-rw-r--r-- | Eigen/src/Core/arch/AVX/Complex.h | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index 4f4f6eb52..0121cec86 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -404,6 +404,31 @@ template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5)); } +template<> EIGEN_DEVICE_FUNC inline void +ptranspose(Kernel<Packet4cf>& kernel) { + __m256d P0 = _mm256_castps_pd(kernel.packet[0].v); + __m256d P1 = _mm256_castps_pd(kernel.packet[1].v); + __m256d P2 = _mm256_castps_pd(kernel.packet[2].v); + __m256d P3 = _mm256_castps_pd(kernel.packet[3].v); + + __m256d T0 = _mm256_shuffle_pd(P0, P1, 15); + __m256d T1 = _mm256_shuffle_pd(P0, P1, 0); + __m256d T2 = _mm256_shuffle_pd(P2, P3, 15); + __m256d T3 = _mm256_shuffle_pd(P2, P3, 0); + + kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32)); + kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49)); + kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32)); + kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49)); +} + +template<> EIGEN_DEVICE_FUNC inline void +ptranspose(Kernel<Packet2cd>& kernel) { + __m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0+(2<<4)); + kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1+(3<<4)); + kernel.packet[0].v = tmp; +} + } // end namespace internal } // end namespace Eigen |