aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2015-11-04 18:26:38 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2015-11-04 18:26:38 +0100
commit3dd24bdf9900beaf403f12db39992efd76e200d5 (patch)
treefb85b32ee609b8859cb4b8333cd1eb97f7119446 /Eigen
parent902750826b52de97f2cd48d91fcf4f70d874e93c (diff)
parent38832e079112c74c81338fe288c9f58273e517f8 (diff)
Merged in aavenel/eigen (pull request PR-142)
Add round, ceil and floor for SSE4.1/AVX (Bug #70)
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h19
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h27
2 files changed, 42 insertions, 4 deletions
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index dfdb71abd..b313fb09a 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -66,7 +66,10 @@ template<> struct packet_traits<float> : default_packet_traits
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
- HasBlend = 1
+ HasBlend = 1,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
};
};
template<> struct packet_traits<double> : default_packet_traits
@@ -83,7 +86,10 @@ template<> struct packet_traits<double> : default_packet_traits
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
- HasBlend = 1
+ HasBlend = 1,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
};
};
@@ -176,6 +182,15 @@ template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const
template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pround<Packet8f>(const Packet8f& a) { return _mm256_round_ps(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet2d pround<Packet4d>(const Packet4d& a) { return _mm256_round_pd(a, 0); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet8f>(const Packet8f& a) { return _mm256_ceil_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet4d>(const Packet4d& a) { return _mm256_ceil_pd(a); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet8f>(const Packet8f& a) { return _mm256_floor_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet4d>(const Packet4d& a) { return _mm256_floor_pd(a); }
+
template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); }
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 3c30b2cf0..3fcb1c138 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -109,7 +109,13 @@ template<> struct packet_traits<float> : default_packet_traits
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
- HasBlend = 1
+ HasBlend = 1,
+
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+#endif
};
};
template<> struct packet_traits<double> : default_packet_traits
@@ -126,7 +132,13 @@ template<> struct packet_traits<double> : default_packet_traits
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
- HasBlend = 1
+ HasBlend = 1,
+
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+#endif
};
};
#endif
@@ -256,6 +268,17 @@ template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const
#endif
}
+#ifdef EIGEN_VECTORIZE_SSE4_1
+template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return _mm_round_ps(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return _mm_round_pd(a, 0); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return _mm_ceil_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return _mm_ceil_pd(a); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return _mm_floor_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return _mm_floor_pd(a); }
+#endif
+
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }