/* * Copyright 2015 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #ifndef SkNx_DEFINED #define SkNx_DEFINED //#define SKNX_NO_SIMD #include "SkScalar.h" #include "SkTypes.h" #include #include #include #define SI static inline // The default SkNx just proxies down to a pair of SkNx. template struct SkNx { typedef SkNx Half; Half fLo, fHi; SkNx() = default; SkNx(const Half& lo, const Half& hi) : fLo(lo), fHi(hi) {} SkNx(T v) : fLo(v), fHi(v) {} SkNx(T a, T b) : fLo(a) , fHi(b) { static_assert(N==2, ""); } SkNx(T a, T b, T c, T d) : fLo(a,b), fHi(c,d) { static_assert(N==4, ""); } SkNx(T a, T b, T c, T d, T e, T f, T g, T h) : fLo(a,b,c,d), fHi(e,f,g,h) { static_assert(N==8, ""); } SkNx(T a, T b, T c, T d, T e, T f, T g, T h, T i, T j, T k, T l, T m, T n, T o, T p) : fLo(a,b,c,d, e,f,g,h), fHi(i,j,k,l, m,n,o,p) { static_assert(N==16, ""); } T operator[](int k) const { SkASSERT(0 <= k && k < N); return k < N/2 ? fLo[k] : fHi[k-N/2]; } static SkNx Load(const void* vptr) { auto ptr = (const char*)vptr; return { Half::Load(ptr), Half::Load(ptr + N/2*sizeof(T)) }; } void store(void* vptr) const { auto ptr = (char*)vptr; fLo.store(ptr); fHi.store(ptr + N/2*sizeof(T)); } bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); } bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); } SkNx abs() const { return { fLo. abs(), fHi. abs() }; } SkNx sqrt() const { return { fLo. sqrt(), fHi. sqrt() }; } SkNx rsqrt() const { return { fLo. rsqrt(), fHi. rsqrt() }; } SkNx floor() const { return { fLo. floor(), fHi. floor() }; } SkNx invert() const { return { fLo.invert(), fHi.invert() }; } SkNx operator!() const { return { !fLo, !fHi }; } SkNx operator-() const { return { -fLo, -fHi }; } SkNx operator~() const { return { ~fLo, ~fHi }; } SkNx operator<<(int bits) const { return { fLo << bits, fHi << bits }; } SkNx operator>>(int bits) const { return { fLo >> bits, fHi >> bits }; } SkNx operator+(const SkNx& y) const { return { fLo + y.fLo, fHi + y.fHi }; } SkNx operator-(const SkNx& y) const { return { fLo - y.fLo, fHi - y.fHi }; } SkNx operator*(const SkNx& y) const { return { fLo * y.fLo, fHi * y.fHi }; } SkNx operator/(const SkNx& y) const { return { fLo / y.fLo, fHi / y.fHi }; } SkNx operator&(const SkNx& y) const { return { fLo & y.fLo, fHi & y.fHi }; } SkNx operator|(const SkNx& y) const { return { fLo | y.fLo, fHi | y.fHi }; } SkNx operator^(const SkNx& y) const { return { fLo ^ y.fLo, fHi ^ y.fHi }; } SkNx operator==(const SkNx& y) const { return { fLo == y.fLo, fHi == y.fHi }; } SkNx operator!=(const SkNx& y) const { return { fLo != y.fLo, fHi != y.fHi }; } SkNx operator<=(const SkNx& y) const { return { fLo <= y.fLo, fHi <= y.fHi }; } SkNx operator>=(const SkNx& y) const { return { fLo >= y.fLo, fHi >= y.fHi }; } SkNx operator< (const SkNx& y) const { return { fLo < y.fLo, fHi < y.fHi }; } SkNx operator> (const SkNx& y) const { return { fLo > y.fLo, fHi > y.fHi }; } SkNx saturatedAdd(const SkNx& y) const { return { fLo.saturatedAdd(y.fLo), fHi.saturatedAdd(y.fHi) }; } SkNx thenElse(const SkNx& t, const SkNx& e) const { return { fLo.thenElse(t.fLo, e.fLo), fHi.thenElse(t.fHi, e.fHi) }; } static SkNx Min(const SkNx& x, const SkNx& y) { return { Half::Min(x.fLo, y.fLo), Half::Min(x.fHi, y.fHi) }; } static SkNx Max(const SkNx& x, const SkNx& y) { return { Half::Max(x.fLo, y.fLo), Half::Max(x.fHi, y.fHi) }; } }; // The N -> N/2 recursion bottoms out at N == 1, a scalar value. template struct SkNx<1,T> { T fVal; SkNx() = default; SkNx(T v) : fVal(v) {} // Android complains against unused parameters, so we guard it T operator[](int SkDEBUGCODE(k)) const { SkASSERT(k == 0); return fVal; } static SkNx Load(const void* ptr) { SkNx v; memcpy(&v, ptr, sizeof(T)); return v; } void store(void* ptr) const { memcpy(ptr, &fVal, sizeof(T)); } bool anyTrue() const { return fVal != 0; } bool allTrue() const { return fVal != 0; } SkNx abs() const { return Abs(fVal); } SkNx sqrt() const { return Sqrt(fVal); } SkNx rsqrt() const { return T(1) / this->sqrt(); } SkNx floor() const { return Floor(fVal); } SkNx invert() const { return T(1) / *this; } SkNx operator!() const { return !fVal; } SkNx operator-() const { return -fVal; } SkNx operator~() const { return FromBits(~ToBits(fVal)); } SkNx operator<<(int bits) const { return fVal << bits; } SkNx operator>>(int bits) const { return fVal >> bits; } SkNx operator+(const SkNx& y) const { return fVal + y.fVal; } SkNx operator-(const SkNx& y) const { return fVal - y.fVal; } SkNx operator*(const SkNx& y) const { return fVal * y.fVal; } SkNx operator/(const SkNx& y) const { return fVal / y.fVal; } SkNx operator&(const SkNx& y) const { return FromBits(ToBits(fVal) & ToBits(y.fVal)); } SkNx operator|(const SkNx& y) const { return FromBits(ToBits(fVal) | ToBits(y.fVal)); } SkNx operator^(const SkNx& y) const { return FromBits(ToBits(fVal) ^ ToBits(y.fVal)); } SkNx operator==(const SkNx& y) const { return FromBits(fVal == y.fVal ? ~0 : 0); } SkNx operator!=(const SkNx& y) const { return FromBits(fVal != y.fVal ? ~0 : 0); } SkNx operator<=(const SkNx& y) const { return FromBits(fVal <= y.fVal ? ~0 : 0); } SkNx operator>=(const SkNx& y) const { return FromBits(fVal >= y.fVal ? ~0 : 0); } SkNx operator< (const SkNx& y) const { return FromBits(fVal < y.fVal ? ~0 : 0); } SkNx operator> (const SkNx& y) const { return FromBits(fVal > y.fVal ? ~0 : 0); } static SkNx Min(const SkNx& x, const SkNx& y) { return x.fVal < y.fVal ? x : y; } static SkNx Max(const SkNx& x, const SkNx& y) { return x.fVal > y.fVal ? x : y; } SkNx saturatedAdd(const SkNx& y) const { static_assert(std::is_unsigned::value, ""); T sum = fVal + y.fVal; return sum < fVal ? std::numeric_limits::max() : sum; } SkNx thenElse(const SkNx& t, const SkNx& e) const { return fVal != 0 ? t : e; } private: // Helper functions to choose the right float/double methods. (In madness lies...) static float Abs(float val) { return ::fabsf(val); } static float Sqrt(float val) { return ::sqrtf(val); } static float Floor(float val) { return ::floorf(val); } static double Abs(double val) { return ::fabs(val); } static double Sqrt(double val) { return ::sqrt(val); } static double Floor(double val) { return ::floor(val); } // Helper functions for working with floats/doubles as bit patterns. template static U ToBits(U v) { return v; } static int32_t ToBits(float v) { int32_t bits; memcpy(&bits, &v, sizeof(v)); return bits; } static int64_t ToBits(double v) { int64_t bits; memcpy(&bits, &v, sizeof(v)); return bits; } template static T FromBits(Bits bits) { static_assert(std::is_pod::value && std::is_pod::value && sizeof(T) <= sizeof(Bits), ""); T val; memcpy(&val, &bits, sizeof(T)); return val; } }; // Allow scalars on the left or right of binary operators, and things like +=, &=, etc. #define V template SI SkNx V operator+ (T x, const SkNx& y) { return SkNx(x) + y; } V operator- (T x, const SkNx& y) { return SkNx(x) - y; } V operator* (T x, const SkNx& y) { return SkNx(x) * y; } V operator/ (T x, const SkNx& y) { return SkNx(x) / y; } V operator& (T x, const SkNx& y) { return SkNx(x) & y; } V operator| (T x, const SkNx& y) { return SkNx(x) | y; } V operator^ (T x, const SkNx& y) { return SkNx(x) ^ y; } V operator==(T x, const SkNx& y) { return SkNx(x) == y; } V operator!=(T x, const SkNx& y) { return SkNx(x) != y; } V operator<=(T x, const SkNx& y) { return SkNx(x) <= y; } V operator>=(T x, const SkNx& y) { return SkNx(x) >= y; } V operator< (T x, const SkNx& y) { return SkNx(x) < y; } V operator> (T x, const SkNx& y) { return SkNx(x) > y; } V operator+ (const SkNx& x, T y) { return x + SkNx(y); } V operator- (const SkNx& x, T y) { return x - SkNx(y); } V operator* (const SkNx& x, T y) { return x * SkNx(y); } V operator/ (const SkNx& x, T y) { return x / SkNx(y); } V operator& (const SkNx& x, T y) { return x & SkNx(y); } V operator| (const SkNx& x, T y) { return x | SkNx(y); } V operator^ (const SkNx& x, T y) { return x ^ SkNx(y); } V operator==(const SkNx& x, T y) { return x == SkNx(y); } V operator!=(const SkNx& x, T y) { return x != SkNx(y); } V operator<=(const SkNx& x, T y) { return x <= SkNx(y); } V operator>=(const SkNx& x, T y) { return x >= SkNx(y); } V operator< (const SkNx& x, T y) { return x < SkNx(y); } V operator> (const SkNx& x, T y) { return x > SkNx(y); } V& operator<<=(SkNx& x, int bits) { return (x = x << bits); } V& operator>>=(SkNx& x, int bits) { return (x = x >> bits); } V& operator +=(SkNx& x, const SkNx& y) { return (x = x + y); } V& operator -=(SkNx& x, const SkNx& y) { return (x = x - y); } V& operator *=(SkNx& x, const SkNx& y) { return (x = x * y); } V& operator /=(SkNx& x, const SkNx& y) { return (x = x / y); } V& operator &=(SkNx& x, const SkNx& y) { return (x = x & y); } V& operator |=(SkNx& x, const SkNx& y) { return (x = x | y); } V& operator ^=(SkNx& x, const SkNx& y) { return (x = x ^ y); } V& operator +=(SkNx& x, T y) { return (x = x + SkNx(y)); } V& operator -=(SkNx& x, T y) { return (x = x - SkNx(y)); } V& operator *=(SkNx& x, T y) { return (x = x * SkNx(y)); } V& operator /=(SkNx& x, T y) { return (x = x / SkNx(y)); } V& operator &=(SkNx& x, T y) { return (x = x & SkNx(y)); } V& operator |=(SkNx& x, T y) { return (x = x | SkNx(y)); } V& operator ^=(SkNx& x, T y) { return (x = x ^ SkNx(y)); } #undef V // SkNx ~~> SkNx + SkNx template SI void SkNx_split(const SkNx& v, SkNx* lo, SkNx* hi) { *lo = v.fLo; *hi = v.fHi; } // SkNx + SkNx ~~> SkNx template SI SkNx SkNx_join(const SkNx& lo, const SkNx& hi) { return { lo, hi }; } // A very generic shuffle. Can reorder, duplicate, contract, expand... // Sk4f v = { R,G,B,A }; // SkNx_shuffle<2,1,0,3>(v) ~~> {B,G,R,A} // SkNx_shuffle<2,1>(v) ~~> {B,G} // SkNx_shuffle<2,1,2,1,2,1,2,1>(v) ~~> {B,G,B,G,B,G,B,G} // SkNx_shuffle<3,3,3,3>(v) ~~> {A,A,A,A} template SI SkNx SkNx_shuffle(const SkNx& v) { return { v[Ix]... }; } // Cast from SkNx to SkNx, as if you called static_cast(Src). template SI SkNx SkNx_cast(const SkNx& v) { return { SkNx_cast(v.fLo), SkNx_cast(v.fHi) }; } template SI SkNx<1,Dst> SkNx_cast(const SkNx<1,Src>& v) { return static_cast(v.fVal); } typedef SkNx<2, float> Sk2f; typedef SkNx<4, float> Sk4f; typedef SkNx<8, float> Sk8f; typedef SkNx<16, float> Sk16f; typedef SkNx<2, SkScalar> Sk2s; typedef SkNx<4, SkScalar> Sk4s; typedef SkNx<8, SkScalar> Sk8s; typedef SkNx<16, SkScalar> Sk16s; typedef SkNx<4, uint8_t> Sk4b; typedef SkNx<8, uint8_t> Sk8b; typedef SkNx<16, uint8_t> Sk16b; typedef SkNx<4, uint16_t> Sk4h; typedef SkNx<8, uint16_t> Sk8h; typedef SkNx<16, uint16_t> Sk16h; typedef SkNx<4, int32_t> Sk4i; typedef SkNx<4, uint32_t> Sk4u; // Include platform specific specializations if available. #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 #include "../opts/SkNx_sse.h" #elif !defined(SKNX_NO_SIMD) && defined(SK_ARM_HAS_NEON) #include "../opts/SkNx_neon.h" #else SI Sk4i Sk4f_round(const Sk4f& x) { return { (int) lrintf (x[0]), (int) lrintf (x[1]), (int) lrintf (x[2]), (int) lrintf (x[3]), }; } // Load 4 Sk4h and transpose them (256 bits total). SI void Sk4h_load4(const void* vptr, Sk4h* r, Sk4h* g, Sk4h* b, Sk4h* a) { const uint64_t* ptr = (const uint64_t*)vptr; auto p0 = Sk4h::Load(ptr+0), p1 = Sk4h::Load(ptr+1), p2 = Sk4h::Load(ptr+2), p3 = Sk4h::Load(ptr+3); *r = { p0[0], p1[0], p2[0], p3[0] }; *g = { p0[1], p1[1], p2[1], p3[1] }; *b = { p0[2], p1[2], p2[2], p3[2] }; *a = { p0[3], p1[3], p2[3], p3[3] }; } // Transpose 4 Sk4h and store (256 bits total). SI void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk4h& b, const Sk4h& a) { uint64_t* dst64 = (uint64_t*) dst; Sk4h(r[0], g[0], b[0], a[0]).store(dst64 + 0); Sk4h(r[1], g[1], b[1], a[1]).store(dst64 + 1); Sk4h(r[2], g[2], b[2], a[2]).store(dst64 + 2); Sk4h(r[3], g[3], b[3], a[3]).store(dst64 + 3); } // Load 4 Sk4f and transpose them (512 bits total). SI void Sk4f_load4(const void* vptr, Sk4f* r, Sk4f* g, Sk4f* b, Sk4f* a) { const float* ptr = (const float*) vptr; auto p0 = Sk4f::Load(ptr + 0), p1 = Sk4f::Load(ptr + 4), p2 = Sk4f::Load(ptr + 8), p3 = Sk4f::Load(ptr + 12); *r = { p0[0], p1[0], p2[0], p3[0] }; *g = { p0[1], p1[1], p2[1], p3[1] }; *b = { p0[2], p1[2], p2[2], p3[2] }; *a = { p0[3], p1[3], p2[3], p3[3] }; } // Transpose 4 Sk4f and store (512 bits total). SI void Sk4f_store4(void* vdst, const Sk4f& r, const Sk4f& g, const Sk4f& b, const Sk4f& a) { float* dst = (float*) vdst; Sk4f(r[0], g[0], b[0], a[0]).store(dst + 0); Sk4f(r[1], g[1], b[1], a[1]).store(dst + 4); Sk4f(r[2], g[2], b[2], a[2]).store(dst + 8); Sk4f(r[3], g[3], b[3], a[3]).store(dst + 12); } #endif SI void Sk4f_ToBytes(uint8_t p[16], const Sk4f& a, const Sk4f& b, const Sk4f& c, const Sk4f& d) { SkNx_cast(SkNx_join(SkNx_join(a,b), SkNx_join(c,d))).store(p); } #undef SI #endif//SkNx_DEFINED