diff options
author | 2014-07-10 22:04:45 +0200 | |
---|---|---|
committer | 2014-07-10 22:04:45 +0200 | |
commit | 296cb4016124d5c186ed65637888bb1c2c5fda2f (patch) | |
tree | f232c9aa62f8c581c7348226dc732d45d397a818 /Eigen/src/Core/arch/AVX/PacketMath.h | |
parent | 61b88d2feb8f23d1ba122f2c9a73abb183ebb25d (diff) | |
parent | d1460d92782ce0f7b90a8a52d44d50b44b167f98 (diff) |
merge with default branch
Diffstat (limited to 'Eigen/src/Core/arch/AVX/PacketMath.h')
-rw-r--r-- | Eigen/src/Core/arch/AVX/PacketMath.h | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 8b8307d75..66b97bd69 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -224,17 +224,17 @@ template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& // NOTE: leverage _mm256_i32gather_ps and _mm256_i32gather_pd if AVX2 instructions are available // NOTE: for the record the following seems to be slower: return _mm256_i32gather_ps(from, _mm256_set1_epi32(stride), 4); -template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, DenseIndex stride) { return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride], from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, DenseIndex stride) { return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, DenseIndex stride) { __m128 low = _mm256_extractf128_ps(from, 0); to[stride*0] = _mm_cvtss_f32(low); @@ -248,7 +248,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, co to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)); to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, DenseIndex stride) { __m128d low = _mm256_extractf128_pd(from, 0); to[stride*0] = _mm_cvtsd_f64(low); |