diff options
author | Gael Guennebaud <g.gael@free.fr> | 2019-01-14 17:57:28 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2019-01-14 17:57:28 +0100 |
commit | 3c9e6d206d6ba0fe5146a634d594469b1d76780f (patch) | |
tree | f6d00dbcac99dcd46350ce8afcc592c6c5713622 /Eigen/src/Core/arch/AVX512/Complex.h | |
parent | 61b6eb05fe150909a88273d205f392e1ec3307ff (diff) |
AVX512: fix pgather/pscatter for Packet4cd and unaligned pointers
Diffstat (limited to 'Eigen/src/Core/arch/AVX512/Complex.h')
-rw-r--r-- | Eigen/src/Core/arch/AVX512/Complex.h | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h index f2034a713..7bb2fd630 100644 --- a/Eigen/src/Core/arch/AVX512/Complex.h +++ b/Eigen/src/Core/arch/AVX512/Complex.h @@ -308,18 +308,18 @@ template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex< template<> EIGEN_DEVICE_FUNC inline Packet4cd pgather<std::complex<double>, Packet4cd>(const std::complex<double>* from, Index stride) { return Packet4cd(_mm512_insertf64x4(_mm512_castpd256_pd512( - _mm256_insertf128_pd(_mm256_castpd128_pd256(pload<Packet1cd>(from+0*stride).v), pload<Packet1cd>(from+1*stride).v,1)), - _mm256_insertf128_pd(_mm256_castpd128_pd256(pload<Packet1cd>(from+2*stride).v), pload<Packet1cd>(from+3*stride).v,1), 1)); + _mm256_insertf128_pd(_mm256_castpd128_pd256(ploadu<Packet1cd>(from+0*stride).v), ploadu<Packet1cd>(from+1*stride).v,1)), + _mm256_insertf128_pd(_mm256_castpd128_pd256(ploadu<Packet1cd>(from+2*stride).v), ploadu<Packet1cd>(from+3*stride).v,1), 1)); } template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet4cd>(std::complex<double>* to, const Packet4cd& from, Index stride) { __m512i fromi = _mm512_castpd_si512(from.v); double* tod = (double*)(void*)to; - _mm_store_pd(tod+0*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,0)) ); - _mm_store_pd(tod+2*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,1)) ); - _mm_store_pd(tod+4*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,2)) ); - _mm_store_pd(tod+6*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,3)) ); + _mm_storeu_pd(tod+0*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,0)) ); + _mm_storeu_pd(tod+2*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,1)) ); + _mm_storeu_pd(tod+4*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,2)) ); + _mm_storeu_pd(tod+6*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,3)) ); } template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet4cd>(const Packet4cd& a) |