diff options
Diffstat (limited to 'Eigen/src/Core/arch')
-rw-r--r-- | Eigen/src/Core/arch/AVX/Complex.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/PacketMath.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/Complex.h | 15 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/AltiVec/PacketMath.h | 34 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/Complex.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/Complex.h | 4 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/SSE/PacketMath.h | 12 |
8 files changed, 48 insertions, 45 deletions
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index 9ced85132..aa5aa1e34 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -92,7 +92,7 @@ template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex< template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); } -template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, DenseIndex stride) { return Packet4cf(_mm256_set_ps(std::imag(from[3*stride]), std::real(from[3*stride]), std::imag(from[2*stride]), std::real(from[2*stride]), @@ -100,7 +100,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packe std::imag(from[0*stride]), std::real(from[0*stride]))); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, DenseIndex stride) { __m128 low = _mm256_extractf128_ps(from.v, 0); to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)), @@ -310,13 +310,13 @@ template<> EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex< template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } -template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, DenseIndex stride) { return Packet2cd(_mm256_set_pd(std::imag(from[1*stride]), std::real(from[1*stride]), std::imag(from[0*stride]), std::real(from[0*stride]))); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, DenseIndex stride) { __m128d low = _mm256_extractf128_pd(from.v, 0); to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1))); diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 74d3746d9..170302f7f 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -226,17 +226,17 @@ template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& // NOTE: leverage _mm256_i32gather_ps and _mm256_i32gather_pd if AVX2 instructions are available // NOTE: for the record the following seems to be slower: return _mm256_i32gather_ps(from, _mm256_set1_epi32(stride), 4); -template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, DenseIndex stride) { return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride], from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, DenseIndex stride) { return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, DenseIndex stride) { __m128 low = _mm256_extractf128_ps(from, 0); to[stride*0] = _mm_cvtss_f32(low); @@ -250,7 +250,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, co to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)); to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, DenseIndex stride) { __m128d low = _mm256_extractf128_pd(from, 0); to[stride*0] = _mm_cvtsd_f64(low); diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index 5409ddedd..13b874d0c 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -16,13 +16,14 @@ namespace internal { static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; static Packet16uc p16uc_COMPLEX_RE = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; -static Packet16uc p16uc_COMPLEX_IM = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; +static Packet16uc p16uc_COMPLEX_IM = vec_sld(p16uc_DUPLICATE, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; static Packet16uc p16uc_COMPLEX_REV = vec_sld(p16uc_REVERSE, p16uc_REVERSE, 8);//{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 }; static Packet16uc p16uc_COMPLEX_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; -static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 1));//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 }; -static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 2), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 3));//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 }; -static Packet16uc p16uc_COMPLEX_TRANSPOSE_0 = { 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23}; -static Packet16uc p16uc_COMPLEX_TRANSPOSE_1 = { 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31}; +static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_COMPLEX_RE, (Packet4ui)p16uc_COMPLEX_IM);//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 }; +static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_COMPLEX_RE, (Packet4ui)p16uc_COMPLEX_IM);//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 }; +static Packet16uc p16uc_COMPLEX_MASK16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8);//{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16}; +static Packet16uc p16uc_COMPLEX_TRANSPOSE_0 = vec_add(p16uc_PSET_HI, p16uc_COMPLEX_MASK16);//{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23}; +static Packet16uc p16uc_COMPLEX_TRANSPOSE_1 = vec_add(p16uc_PSET_LO, p16uc_COMPLEX_MASK16);//{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31}; //---------- float ---------- struct Packet2cf @@ -68,14 +69,14 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo return res; } -template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride) { std::complex<float> EIGEN_ALIGN16 af[2]; af[0] = from[0*stride]; af[1] = from[1*stride]; return Packet2cf(vec_ld(0, (const float*)af)); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride) { std::complex<float> EIGEN_ALIGN16 af[2]; vec_st(from.v, 0, (float*)af); diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 0e9adf450..b43e8ace3 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Konstantinos Margaritis <markos@codex.gr> +// Copyright (C) 2008-2014 Konstantinos Margaritis <markos@freevec.org> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -62,17 +62,17 @@ typedef __vector unsigned char Packet16uc; // Define global static constants: static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 }; static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 }; -static Packet16uc p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; -static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); -static Packet16uc p16uc_DUPLICATE = {0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7}; - -static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); -static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); -static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); -static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); -static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); -static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); -static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); +static Packet16uc p16uc_REVERSE = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; +static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); //{ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15} +static Packet16uc p16uc_DUPLICATE = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7}; + +static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0} +static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,} +static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1} +static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16} +static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1} +static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0} +static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000} template<> struct packet_traits<float> : default_packet_traits { @@ -82,8 +82,10 @@ template<> struct packet_traits<float> : default_packet_traits Vectorizable = 1, AlignedOnScalar = 1, size=4, + HasHalfPacket=0, // FIXME check the Has* + HasDiv = 1, HasSin = 0, HasCos = 0, HasLog = 0, @@ -190,7 +192,7 @@ pbroadcast4<Packet4i>(const int *a, a3 = vec_splat(a3, 3); } -template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride) { float EIGEN_ALIGN16 af[4]; af[0] = from[0*stride]; @@ -199,7 +201,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const floa af[3] = from[3*stride]; return vec_ld(0, af); } -template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride) { int EIGEN_ALIGN16 ai[4]; ai[0] = from[0*stride]; @@ -208,7 +210,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* f ai[3] = from[3*stride]; return vec_ld(0, ai); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride) { float EIGEN_ALIGN16 af[4]; vec_st(from, 0, af); @@ -217,7 +219,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, co to[2*stride] = af[2]; to[3*stride] = af[3]; } -template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride) { int EIGEN_ALIGN16 ai[4]; vec_st(from, 0, ai); diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index 259f2e7b8..42e7733d7 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -111,7 +111,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex< template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } -template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride) { Packet4f res; res = vsetq_lane_f32(std::real(from[0*stride]), res, 0); @@ -121,7 +121,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packe return Packet2cf(res); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride) { to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1)); to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3)); diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index e5eb06f36..380b76ae9 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -222,7 +222,7 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& f template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } -template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride) { Packet4f res; res = vsetq_lane_f32(from[0*stride], res, 0); @@ -231,7 +231,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const floa res = vsetq_lane_f32(from[3*stride], res, 3); return res; } -template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride) { Packet4i res; res = vsetq_lane_s32(from[0*stride], res, 0); @@ -241,14 +241,14 @@ template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* f return res; } -template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride) { to[stride*0] = vgetq_lane_f32(from, 0); to[stride*1] = vgetq_lane_f32(from, 1); to[stride*2] = vgetq_lane_f32(from, 2); to[stride*3] = vgetq_lane_f32(from, 3); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride) { to[stride*0] = vgetq_lane_s32(from, 0); to[stride*1] = vgetq_lane_s32(from, 1); diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 0bc03cf9e..565e448fe 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -115,13 +115,13 @@ template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<f template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); } -template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, DenseIndex stride) { return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]), std::imag(from[0*stride]), std::real(from[0*stride]))); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, DenseIndex stride) { to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)), _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1))); diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 1124b24df..6923c88ec 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -387,32 +387,32 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), Packet2d(_mm_castps_pd(from))); } template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), Packet2d(_mm_castsi128_pd(from))); } -template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, DenseIndex stride) { return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, DenseIndex stride) { return _mm_set_pd(from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, int stride) +template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride) { return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride) { to[stride*0] = _mm_cvtss_f32(from); to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1)); to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2)); to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3)); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, DenseIndex stride) { to[stride*0] = _mm_cvtsd_f64(from); to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1)); } -template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, int stride) +template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride) { to[stride*0] = _mm_cvtsi128_si32(from); to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1)); |