aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/AVX
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-03-25 09:00:43 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-03-25 09:00:43 -0700
commit6bf3cc2732eebff73dd7fadcd8ac421f22381baf (patch)
treea0d488a38e1b2cc3e9881fa462c342078097f14c /Eigen/src/Core/arch/AVX
parent7ae9b0805dbb218506a462c06263bd67f046366b (diff)
Use AVX instructions to vectorize pset1<Packet2cd>, pset1<Packet4cf>, preverse<Packet2cd>, and preverse<Packet4cf>
Diffstat (limited to 'Eigen/src/Core/arch/AVX')
-rw-r--r--Eigen/src/Core/arch/AVX/Complex.h40
1 files changed, 16 insertions, 24 deletions
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index 9fb44ecab..17c32d79c 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h
@@ -76,11 +76,9 @@ template<> EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<fl
template<> EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from)
{
- __m256 result;
- for (int i = 0; i < 8; i+=2) {
- result[i] = std::real(from);
- result[i+1] = std::imag(from);
- }
+ const float r = std::real(from);
+ const float i = std::imag(from);
+ const __m256 result = _mm256_set_ps(i, r, i, r, i, r, i, r);
return Packet4cf(result);
}
@@ -108,15 +106,15 @@ template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet4cf>(const Pack
}
template<> EIGEN_STRONG_INLINE Packet4cf preverse(const Packet4cf& a) {
+ __m128 low = _mm256_extractf128_ps(a.v, 0);
+ __m128 high = _mm256_extractf128_ps(a.v, 1);
+ __m128d lowd = _mm_castps_pd(low);
+ __m128d highd = _mm_castps_pd(high);
+ low = _mm_castpd_ps(_mm_shuffle_pd(lowd,lowd,0x1));
+ high = _mm_castpd_ps(_mm_shuffle_pd(highd,highd,0x1));
__m256 result;
- result[0] = a.v[6];
- result[1] = a.v[7];
- result[2] = a.v[4];
- result[3] = a.v[5];
- result[4] = a.v[2];
- result[5] = a.v[3];
- result[6] = a.v[0];
- result[7] = a.v[1];
+ result = _mm256_insertf128_ps(result, low, 1);
+ result = _mm256_insertf128_ps(result, high, 0);
return Packet4cf(result);
}
@@ -298,13 +296,11 @@ template<> EIGEN_STRONG_INLINE Packet2cd pload <Packet2cd>(const std::complex<do
template<> EIGEN_STRONG_INLINE Packet2cd ploadu<Packet2cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(ploadu<Packet4d>((const double*)from)); }
-template<> EIGEN_STRONG_INLINE Packet2cd pset1<Packet2cd>(const std::complex<double>& from)
+template<> EIGEN_STRONG_INLINE Packet2cd pset1<Packet2cd>(const std::complex<double>& from)
{
- __m256d result;
- for (int i = 0; i < 4; i+=2) {
- result[i] = std::real(from);
- result[i+1] = std::imag(from);
- }
+ const double r = std::real(from);
+ const double i = std::imag(from);
+ const __m256d result = _mm256_set_pd(i, r, i, r);
return Packet2cd(result);
}
@@ -321,11 +317,7 @@ template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet2cd>(const Pac
}
template<> EIGEN_STRONG_INLINE Packet2cd preverse(const Packet2cd& a) {
- __m256d result;
- result[0] = a.v[2];
- result[1] = a.v[3];
- result[2] = a.v[0];
- result[3] = a.v[1];
+ __m256d result = _mm256_permute2f128_pd(a.v, a.v, 1);
return Packet2cd(result);
}