aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/AltiVec/PacketMath.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2018-04-04 13:10:38 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2018-04-04 13:10:38 +0200
commit13f5df9f6763f2dc900b74df06a3b1e67bacdaaf (patch)
treed72585262de50ed8a3f0e6104230a302b9852ca3 /Eigen/src/Core/arch/AltiVec/PacketMath.h
parente91e314347c14774206307a91d1b427e49f9b3e2 (diff)
Add a note on vec_min vs asm
Diffstat (limited to 'Eigen/src/Core/arch/AltiVec/PacketMath.h')
-rwxr-xr-xEigen/src/Core/arch/AltiVec/PacketMath.h4
1 files changed, 4 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index ad0c6df45..6d7190a56 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -391,6 +391,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i&
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
{
#ifdef __VSX__
+ // NOTE: about 10% slower than vec_min, but consistent with std::min and SSE regarding NaN
Packet4f ret;
__asm__ ("xvcmpgesp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
return ret;
@@ -403,6 +404,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
{
#ifdef __VSX__
+ // NOTE: about 10% slower than vec_max, but consistent with std::max and SSE regarding NaN
Packet4f ret;
__asm__ ("xvcmpgtsp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
return ret;
@@ -930,6 +932,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d&
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b)
{
+ // NOTE: about 10% slower than vec_min, but consistent with std::min and SSE regarding NaN
Packet2d ret;
__asm__ ("xvcmpgedp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
return ret;
@@ -937,6 +940,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b)
{
+ // NOTE: about 10% slower than vec_max, but consistent with std::max and SSE regarding NaN
Packet2d ret;
__asm__ ("xvcmpgtdp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
return ret;