aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch
diff options
context:
space:
mode:
authorGravatar Konstantinos Margaritis <markos@freevec.org>2014-09-21 08:12:22 +0000
committerGravatar Konstantinos Margaritis <markos@freevec.org>2014-09-21 08:12:22 +0000
commitc0205ca4af402eadcf1a21bc0949afa4c9d9712a (patch)
treee5b64700e8d5248f95b17e8f5228c4f0af0eb8fd /Eigen/src/Core/arch
parent10f8aabb611e7b727ff67141b030689c0a53ca78 (diff)
VSX supports vec_div, implement where appropriate (float/doubles)
Diffstat (limited to 'Eigen/src/Core/arch')
-rwxr-xr-xEigen/src/Core/arch/AltiVec/PacketMath.h22
1 files changed, 6 insertions, 16 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index f319f4266..3555c521d 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -336,6 +336,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const
*/
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
{
+#ifndef __VSX__ // VSX actually provides a div instruction
Packet4f t, y_0, y_1, res;
// Altivec does not offer a divide instruction, we have to do a reciprocal approximation
@@ -345,8 +346,10 @@ template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const
t = vec_nmsub(y_0, b, p4f_ONE);
y_1 = vec_madd(y_0, t, y_0);
- res = vec_madd(a, y_1, p4f_ZERO);
- return res;
+ return vec_madd(a, y_1, p4f_ZERO);
+#else
+ return vec_div(a, b);
+#endif
}
template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
@@ -801,20 +804,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return psub
template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_ZERO); }
-template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b)
-{
- Packet2d t, y_0, y_1, res;
-
- // Altivec does not offer a divide instruction, we have to do a reciprocal approximation
- y_0 = vec_re(b);
-
- // Do one Newton-Raphson iteration to get the needed accuracy
- t = vec_nmsub(y_0, b, p2d_ONE);
- y_1 = vec_madd(y_0, t, y_0);
-
- res = vec_madd(a, y_1, p2d_ZERO);
- return res;
-}
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_div(a,b); }
// for some weird raisons, it has to be overloaded for packet of integers
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }