diff options
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/Sk4x.h | 3 | ||||
-rw-r--r-- | src/core/Sk4x_portable.h | 16 | ||||
-rw-r--r-- | src/core/Sk4x_sse.h | 3 |
3 files changed, 22 insertions, 0 deletions
diff --git a/src/core/Sk4x.h b/src/core/Sk4x.h index 058c400380..b01b6f1648 100644 --- a/src/core/Sk4x.h +++ b/src/core/Sk4x.h @@ -50,6 +50,9 @@ public: Sk4x multiply(const Sk4x&) const; Sk4x divide(const Sk4x&) const; + Sk4x rsqrt() const; // Approximate reciprocal sqrt(). + Sk4x sqrt() const; // this->multiply(this->rsqrt()) may be faster, but less precise. + Sk4i equal(const Sk4x&) const; Sk4i notEqual(const Sk4x&) const; Sk4i lessThan(const Sk4x&) const; diff --git a/src/core/Sk4x_portable.h b/src/core/Sk4x_portable.h index bd056c777e..440e91faaa 100644 --- a/src/core/Sk4x_portable.h +++ b/src/core/Sk4x_portable.h @@ -2,6 +2,8 @@ // This file will be intentionally included three times. #if defined(SK4X_PREAMBLE) + #include "SkFloatingPoint.h" + #include <math.h> #elif defined(SK4X_PRIVATE) typedef T Type; @@ -60,6 +62,20 @@ M(Sk4x<T>) multiply(const Sk4x<T>& other) const { return Sk4x(BINOP(*)); } M(Sk4x<T>) divide(const Sk4x<T>& other) const { return Sk4x(BINOP(/)); } #undef BINOP +template<> inline Sk4f Sk4f::rsqrt() const { + return Sk4f(sk_float_rsqrt(fVec[0]), + sk_float_rsqrt(fVec[1]), + sk_float_rsqrt(fVec[2]), + sk_float_rsqrt(fVec[3])); +} + +template<> inline Sk4f Sk4f::sqrt() const { + return Sk4f(sqrtf(fVec[0]), + sqrtf(fVec[1]), + sqrtf(fVec[2]), + sqrtf(fVec[3])); +} + #define BOOL_BINOP(op) fVec[0] op other.fVec[0] ? -1 : 0, \ fVec[1] op other.fVec[1] ? -1 : 0, \ fVec[2] op other.fVec[2] ? -1 : 0, \ diff --git a/src/core/Sk4x_sse.h b/src/core/Sk4x_sse.h index ee09f77449..6077d0273d 100644 --- a/src/core/Sk4x_sse.h +++ b/src/core/Sk4x_sse.h @@ -99,6 +99,9 @@ M(Sk4f) subtract(const Sk4f& o) const { return _mm_sub_ps(fVec, o.fVec); } M(Sk4f) multiply(const Sk4f& o) const { return _mm_mul_ps(fVec, o.fVec); } M(Sk4f) divide (const Sk4f& o) const { return _mm_div_ps(fVec, o.fVec); } +M(Sk4f) rsqrt() const { return _mm_rsqrt_ps(fVec); } +M(Sk4f) sqrt() const { return _mm_sqrt_ps( fVec); } + M(Sk4i) equal (const Sk4f& o) const { return _mm_cmpeq_ps (fVec, o.fVec); } M(Sk4i) notEqual (const Sk4f& o) const { return _mm_cmpneq_ps(fVec, o.fVec); } M(Sk4i) lessThan (const Sk4f& o) const { return _mm_cmplt_ps (fVec, o.fVec); } |