aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2018-11-30 16:21:33 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2018-11-30 16:21:33 +0100
commitc785464430bfc697debe3f8d49e49064aa08e0a3 (patch)
treea1954d2716532cb9916d0a0b195ba353f20a89b5
parent69ace742be6f00f4280d312e046b0b1422fd112c (diff)
Add packet sin and cos to Altivec/VSX and NEON
-rw-r--r--Eigen/src/Core/arch/AltiVec/MathFunctions.h12
-rwxr-xr-xEigen/src/Core/arch/AltiVec/PacketMath.h5
-rw-r--r--Eigen/src/Core/arch/NEON/MathFunctions.h20
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h6
4 files changed, 35 insertions, 8 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/Eigen/src/Core/arch/AltiVec/MathFunctions.h
index acf665018..81097e668 100644
--- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h
+++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h
@@ -30,6 +30,18 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
return pexp_float(_x);
}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psin<Packet4f>(const Packet4f& _x)
+{
+ return psin_float(_x);
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pcos<Packet4f>(const Packet4f& _x)
+{
+ return pcos_float(_x);
+}
+
#ifndef EIGEN_COMP_CLANG
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f prsqrt<Packet4f>(const Packet4f& x)
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 446065fb7..d0ee93f4a 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -146,8 +146,8 @@ template<> struct packet_traits<float> : default_packet_traits
HasMin = 1,
HasMax = 1,
HasAbs = 1,
- HasSin = 0,
- HasCos = 0,
+ HasSin = EIGEN_FAST_MATH,
+ HasCos = EIGEN_FAST_MATH,
HasLog = 1,
HasExp = 1,
#ifdef __VSX__
@@ -437,6 +437,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const
Packet4f c = reinterpret_cast<Packet4f>(vec_cmpge(a,b));
return vec_nor(c,c);
}
+template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmpeq(a,b)); }
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
diff --git a/Eigen/src/Core/arch/NEON/MathFunctions.h b/Eigen/src/Core/arch/NEON/MathFunctions.h
index addaacb9a..2e7d0e944 100644
--- a/Eigen/src/Core/arch/NEON/MathFunctions.h
+++ b/Eigen/src/Core/arch/NEON/MathFunctions.h
@@ -15,15 +15,27 @@ namespace Eigen {
namespace internal {
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
-Packet4f pexp<Packet4f>(const Packet4f& _x)
+Packet4f pexp<Packet4f>(const Packet4f& x)
{
- return pexp_float(_x);
+ return pexp_float(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
-Packet4f plog<Packet4f>(const Packet4f& _x)
+Packet4f plog<Packet4f>(const Packet4f& x)
{
- return plog_float(_x);
+ return plog_float(x);
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psin<Packet4f>(const Packet4f& x)
+{
+ return psin_float(x);
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pcos<Packet4f>(const Packet4f& x)
+{
+ return pcos_float(x);
}
} // end namespace internal
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 0a50153c7..ed3cec88a 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -111,8 +111,8 @@ template<> struct packet_traits<float> : default_packet_traits
HasDiv = 1,
HasFloor = 1,
// FIXME check the Has*
- HasSin = 0,
- HasCos = 0,
+ HasSin = EIGEN_FAST_MATH,
+ HasCos = EIGEN_FAST_MATH,
HasLog = 1,
HasExp = 1,
HasSqrt = 0
@@ -268,6 +268,8 @@ template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4
template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return vreinterpretq_f32_u32(vceqq_f32(a,b)); }
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { return vreinterpretq_f32_u32(vmvnq_u32(vcgeq_f32(a,b))); }
+template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return vreinterpretq_s32_u32(vceqq_s32(a,b)); }
+
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
{
const Packet4f cst_1 = pset1<Packet4f>(1.0f);