aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/AVX
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-08-13 22:25:29 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-08-13 22:25:29 -0700
commit16047c8d4a916baa200036c4d5501707b3552720 (patch)
treee8dc65e4de304a16247f71ca5f40c5194b1aad5e /Eigen/src/Core/arch/AVX
parent916ef48846b40f690f41583d288eb1c3c40db0a3 (diff)
parente51da9c3a8b448bc06110f1a7376211dcd32cc0e (diff)
Pulled in the latest changes from the Eigen trunk
Diffstat (limited to 'Eigen/src/Core/arch/AVX')
-rw-r--r--Eigen/src/Core/arch/AVX/Complex.h8
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h8
2 files changed, 8 insertions, 8 deletions
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index 9ced85132..aa5aa1e34 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h
@@ -92,7 +92,7 @@ template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
-template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, int stride)
+template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, DenseIndex stride)
{
return Packet4cf(_mm256_set_ps(std::imag(from[3*stride]), std::real(from[3*stride]),
std::imag(from[2*stride]), std::real(from[2*stride]),
@@ -100,7 +100,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packe
std::imag(from[0*stride]), std::real(from[0*stride])));
}
-template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, int stride)
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, DenseIndex stride)
{
__m128 low = _mm256_extractf128_ps(from.v, 0);
to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)),
@@ -310,13 +310,13 @@ template<> EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex<
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
-template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, int stride)
+template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, DenseIndex stride)
{
return Packet2cd(_mm256_set_pd(std::imag(from[1*stride]), std::real(from[1*stride]),
std::imag(from[0*stride]), std::real(from[0*stride])));
}
-template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, int stride)
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, DenseIndex stride)
{
__m128d low = _mm256_extractf128_pd(from.v, 0);
to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 74d3746d9..170302f7f 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -226,17 +226,17 @@ template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i&
// NOTE: leverage _mm256_i32gather_ps and _mm256_i32gather_pd if AVX2 instructions are available
// NOTE: for the record the following seems to be slower: return _mm256_i32gather_ps(from, _mm256_set1_epi32(stride), 4);
-template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, int stride)
+template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, DenseIndex stride)
{
return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride],
from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
}
-template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, int stride)
+template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, DenseIndex stride)
{
return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
}
-template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, int stride)
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, DenseIndex stride)
{
__m128 low = _mm256_extractf128_ps(from, 0);
to[stride*0] = _mm_cvtss_f32(low);
@@ -250,7 +250,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, co
to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2));
to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3));
}
-template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, int stride)
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, DenseIndex stride)
{
__m128d low = _mm256_extractf128_pd(from, 0);
to[stride*0] = _mm_cvtsd_f64(low);