aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/AVX/Complex.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/arch/AVX/Complex.h')
-rw-r--r--Eigen/src/Core/arch/AVX/Complex.h25
1 files changed, 25 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index 4f4f6eb52..0121cec86 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h
@@ -404,6 +404,31 @@ template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x
return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5));
}
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(Kernel<Packet4cf>& kernel) {
+ __m256d P0 = _mm256_castps_pd(kernel.packet[0].v);
+ __m256d P1 = _mm256_castps_pd(kernel.packet[1].v);
+ __m256d P2 = _mm256_castps_pd(kernel.packet[2].v);
+ __m256d P3 = _mm256_castps_pd(kernel.packet[3].v);
+
+ __m256d T0 = _mm256_shuffle_pd(P0, P1, 15);
+ __m256d T1 = _mm256_shuffle_pd(P0, P1, 0);
+ __m256d T2 = _mm256_shuffle_pd(P2, P3, 15);
+ __m256d T3 = _mm256_shuffle_pd(P2, P3, 0);
+
+ kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32));
+ kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49));
+ kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32));
+ kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));
+}
+
+template<> EIGEN_DEVICE_FUNC inline void
+ptranspose(Kernel<Packet2cd>& kernel) {
+ __m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0+(2<<4));
+ kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1+(3<<4));
+ kernel.packet[0].v = tmp;
+}
+
} // end namespace internal
} // end namespace Eigen