diff options
author | Gael Guennebaud <g.gael@free.fr> | 2018-11-26 15:58:11 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2018-11-26 15:58:11 +0100 |
commit | c2f35b1b4763348fd0a6df2ce750a7d3d3a56d79 (patch) | |
tree | 86dd64dfd2135d7b7e2f8098197f2b313647cb42 /Eigen | |
parent | c24e98e6a83dbd9cb305941a144fa1f4b21c6437 (diff) |
Unify Altivec/VSX's plog with generic implementation, and enable it!
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/MathFunctions.h | 61 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/AltiVec/PacketMath.h | 28 |
2 files changed, 31 insertions, 58 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/Eigen/src/Core/arch/AltiVec/MathFunctions.h index c5e4bede7..8f3296253 100644 --- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h +++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h @@ -9,13 +9,15 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -/* The sin, cos, exp, and log functions of this file come from +/* The sin, cos, and exp functions of this file come from * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/ */ #ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H #define EIGEN_MATH_FUNCTIONS_ALTIVEC_H +#include "../Default/GenericPacketMathFunctions.h" + namespace Eigen { namespace internal { @@ -94,62 +96,7 @@ static Packet2ul p2ul_52 = { 52, 52 }; template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f plog<Packet4f>(const Packet4f& _x) { - Packet4f x = _x; - - Packet4i emm0; - - /* isvalid_mask is 0 if x < 0 or x is NaN. */ - Packet4ui isvalid_mask = reinterpret_cast<Packet4ui>(vec_cmpge(x, p4f_ZERO)); - Packet4ui iszero_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(x, p4f_ZERO)); - - x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */ - emm0 = vec_sr(reinterpret_cast<Packet4i>(x), - reinterpret_cast<Packet4ui>(p4i_23)); - - /* keep only the fractional part */ - x = pand(x, p4f_inv_mant_mask); - x = por(x, p4f_half); - - emm0 = psub(emm0, p4i_0x7f); - Packet4f e = padd(vec_ctf(emm0, 0), p4f_1); - - /* part2: - if( x < SQRTHF ) { - e -= 1; - x = x + x - 1.0; - } else { x = x - 1.0; } - */ - Packet4f mask = reinterpret_cast<Packet4f>(vec_cmplt(x, p4f_cephes_SQRTHF)); - Packet4f tmp = pand(x, mask); - x = psub(x, p4f_1); - e = psub(e, pand(p4f_1, mask)); - x = padd(x, tmp); - - Packet4f x2 = pmul(x,x); - Packet4f x3 = pmul(x2,x); - - Packet4f y, y1, y2; - y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1); - y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4); - y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7); - y = pmadd(y , x, p4f_cephes_log_p2); - y1 = pmadd(y1, x, p4f_cephes_log_p5); - y2 = pmadd(y2, x, p4f_cephes_log_p8); - y = pmadd(y, x3, y1); - y = pmadd(y, x3, y2); - y = pmul(y, x3); - - y1 = pmul(e, p4f_cephes_log_q1); - tmp = pmul(x2, p4f_half); - y = padd(y, y1); - x = psub(x, tmp); - y2 = pmul(e, p4f_cephes_log_q2); - x = padd(x, y); - x = padd(x, y2); - // negative arg will be NAN, 0 will be -INF - x = vec_sel(x, p4f_minus_inf, iszero_mask); - x = vec_sel(p4f_minus_nan, x, isvalid_mask); - return x; + return plog_float(_x); } template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 7f4e90f75..867aa8494 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -148,7 +148,7 @@ template<> struct packet_traits<float> : default_packet_traits HasAbs = 1, HasSin = 0, HasCos = 0, - HasLog = 0, + HasLog = 1, HasExp = 1, #ifdef __VSX__ HasSqrt = 1, @@ -285,6 +285,11 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { Packet4i v = {from, from, from, from}; return v; } + +template<> EIGEN_STRONG_INLINE Packet4f pset1frombits<Packet4f>(unsigned int from) { + return reinterpret_cast<Packet4f>(pset1<Packet4i>(from)); +} + template<> EIGEN_STRONG_INLINE void pbroadcast4<Packet4f>(const float *a, Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3) @@ -414,6 +419,14 @@ template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const } template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); } +template<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmple(a,b)); } +template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmplt(a,b)); } +template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmpeq(a,b)); } +template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { + Packet4f c = reinterpret_cast<Packet4f>(vec_cmpge(a,b)); + return vec_nor(c,c); +} + template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); } @@ -426,6 +439,10 @@ template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); } template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); } +template<> EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) { + return vec_sel(b, a, mask); +} + template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return vec_round(a); } template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return vec_ceil(a); } template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); } @@ -550,6 +567,15 @@ template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); } template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); } +template<> EIGEN_STRONG_INLINE Packet4f pshiftright_and_cast(Packet4f a, int n) { + return vec_ctf(vec_sr(reinterpret_cast<Packet4i>(a), + reinterpret_cast<Packet4ui>(pset1<Packet4i>(n))),0); +} + +template<> EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent) { + return pfrexp_float(a,exponent); +} + template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) { Packet4f b, sum; |