diff options
author | Gael Guennebaud <g.gael@free.fr> | 2015-02-16 15:05:41 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2015-02-16 15:05:41 +0100 |
commit | 45cbb0bbb1f66bbc923dd4dd1034b919f6b4a191 (patch) | |
tree | a0bfd3bc07784adac9f48b092f7fc2651c16389a /Eigen/src/Core/arch | |
parent | cc641aabb710ab002b6c641bd5c3e1deed0e634d (diff) |
The usage of DenseIndex is deprecated, so let's replace DenseIndex by Index
Diffstat (limited to 'Eigen/src/Core/arch')
-rw-r--r-- | Eigen/src/Core/arch/AVX/Complex.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/PacketMath.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/Complex.h | 8 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/AltiVec/PacketMath.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/Complex.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/Complex.h | 4 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/SSE/PacketMath.h | 12 |
8 files changed, 36 insertions, 36 deletions
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index aa5aa1e34..003a1fc3c 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -92,7 +92,7 @@ template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex< template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); } -template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, Index stride) { return Packet4cf(_mm256_set_ps(std::imag(from[3*stride]), std::real(from[3*stride]), std::imag(from[2*stride]), std::real(from[2*stride]), @@ -100,7 +100,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packe std::imag(from[0*stride]), std::real(from[0*stride]))); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, Index stride) { __m128 low = _mm256_extractf128_ps(from.v, 0); to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)), @@ -310,13 +310,13 @@ template<> EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex< template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } -template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, Index stride) { return Packet2cd(_mm256_set_pd(std::imag(from[1*stride]), std::real(from[1*stride]), std::imag(from[0*stride]), std::real(from[0*stride]))); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, Index stride) { __m128d low = _mm256_extractf128_pd(from.v, 0); to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1))); diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index be66a502a..ff6cc6b56 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -226,17 +226,17 @@ template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& // NOTE: leverage _mm256_i32gather_ps and _mm256_i32gather_pd if AVX2 instructions are available // NOTE: for the record the following seems to be slower: return _mm256_i32gather_ps(from, _mm256_set1_epi32(stride), 4); -template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, Index stride) { return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride], from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, Index stride) { return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, Index stride) { __m128 low = _mm256_extractf128_ps(from, 0); to[stride*0] = _mm_cvtss_f32(low); @@ -250,7 +250,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, co to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)); to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, Index stride) { __m128d low = _mm256_extractf128_pd(from, 0); to[stride*0] = _mm_cvtsd_f64(low); diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index f9b93a42b..565d2ece0 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -67,14 +67,14 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo return res; } -template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride) { std::complex<float> EIGEN_ALIGN16 af[2]; af[0] = from[0*stride]; af[1] = from[1*stride]; return Packet2cf(vec_ld(0, (const float*)af)); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride) { std::complex<float> EIGEN_ALIGN16 af[2]; vec_st(from.v, 0, (float*)af); @@ -285,14 +285,14 @@ template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex< template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from) { /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); } -template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride) { std::complex<double> EIGEN_ALIGN16 af[2]; af[0] = from[0*stride]; af[1] = from[1*stride]; return pload<Packet1cd>(af); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride) { std::complex<double> EIGEN_ALIGN16 af[2]; pstore<std::complex<double> >(af, from); diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 6b68fc7a5..d647427ce 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -252,7 +252,7 @@ pbroadcast4<Packet4i>(const int *a, a3 = vec_splat(a3, 3); } -template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) { float EIGEN_ALIGN16 af[4]; af[0] = from[0*stride]; @@ -261,7 +261,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const floa af[3] = from[3*stride]; return pload<Packet4f>(af); } -template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride) { int EIGEN_ALIGN16 ai[4]; ai[0] = from[0*stride]; @@ -270,7 +270,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* f ai[3] = from[3*stride]; return pload<Packet4i>(ai); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride) { float EIGEN_ALIGN16 af[4]; pstore<float>(af, from); @@ -279,7 +279,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, co to[2*stride] = af[2]; to[3*stride] = af[3]; } -template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride) { int EIGEN_ALIGN16 ai[4]; pstore<int>((int *)ai, from); @@ -793,14 +793,14 @@ pbroadcast4<Packet2d>(const double *a, a2 = vec_splat_dbl(a3, 0); a3 = vec_splat_dbl(a3, 1); } -template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) { double EIGEN_ALIGN16 af[2]; af[0] = from[0*stride]; af[1] = from[1*stride]; return pload<Packet2d>(af); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride) { double EIGEN_ALIGN16 af[2]; pstore<double>(af, from); diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index 0fdcb0741..154daa7a7 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -112,7 +112,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex< template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } -template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride) { Packet4f res; res = vsetq_lane_f32(std::real(from[0*stride]), res, 0); @@ -122,7 +122,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packe return Packet2cf(res); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride) { to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1)); to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3)); @@ -363,7 +363,7 @@ template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex< template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((double *)addr); } -template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride) { Packet2d res; res = vsetq_lane_f64(std::real(from[0*stride]), res, 0); @@ -371,7 +371,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Pack return Packet1cd(res); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride) { to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1)); } diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 559682cf7..8149aed7f 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -250,7 +250,7 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& f template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } -template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) { Packet4f res; res = vsetq_lane_f32(from[0*stride], res, 0); @@ -259,7 +259,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const floa res = vsetq_lane_f32(from[3*stride], res, 3); return res; } -template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride) { Packet4i res; res = vsetq_lane_s32(from[0*stride], res, 0); @@ -269,14 +269,14 @@ template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* f return res; } -template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride) { to[stride*0] = vgetq_lane_f32(from, 0); to[stride*1] = vgetq_lane_f32(from, 1); to[stride*2] = vgetq_lane_f32(from, 2); to[stride*3] = vgetq_lane_f32(from, 3); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride) { to[stride*0] = vgetq_lane_s32(from, 0); to[stride*1] = vgetq_lane_s32(from, 1); @@ -606,14 +606,14 @@ template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f64(to, from); } -template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) { Packet2d res; res = vsetq_lane_f64(from[0*stride], res, 0); res = vsetq_lane_f64(from[1*stride], res, 1); return res; } -template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride) { to[stride*0] = vgetq_lane_f64(from, 0); to[stride*1] = vgetq_lane_f64(from, 1); diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 565e448fe..acb49abf8 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -115,13 +115,13 @@ template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<f template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); } -template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride) { return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]), std::imag(from[0*stride]), std::real(from[0*stride]))); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride) { to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)), _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1))); diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 898cb9ab0..86d94dffa 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -387,32 +387,32 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), Packet2d(_mm_castps_pd(from))); } template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), Packet2d(_mm_castsi128_pd(from))); } -template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) { return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) { return _mm_set_pd(from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride) { return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride) { to[stride*0] = _mm_cvtss_f32(from); to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1)); to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2)); to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3)); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride) { to[stride*0] = _mm_cvtsd_f64(from); to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1)); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride) { to[stride*0] = _mm_cvtsi128_si32(from); to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1)); |