sknx refactoring

- trim unused specializations (Sk4i, Sk2d) and apis (SkNx_dup) - expand apis a little * v[0] == v.kth<0>() * SkNx_shuffle can now convert to different-sized vectors, e.g. Sk2f <-> Sk4f - remove anonymous namespace I believe it's safe to remove the anonymous namespace right now. We're worried about violating the One Definition Rule; the anonymous namespace protected us from that. In Release builds, this is mostly moot, as everything tends to inline completely. In Debug builds, violating the ODR is at worst an inconvenience, time spent trying to figure out why the bot is broken. Now that we're building with SSE2/NEON everywhere, very few bots have even a chance about getting confused by two definitions of the same type or function. Where we do compile variants depending on, e.g., SSSE3, we do so in static inline functions. These are not subject to the ODR. I plan to follow up with a tedious .kth<...>() -> [...] auto-replace. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1683543002 CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review URL: https://codereview.chromium.org/1683543002
author: mtklein <mtklein@chromium.org> 2016-02-09 10:35:27 -0800
committer: Commit bot <commit-bot@chromium.org> 2016-02-09 10:35:28 -0800
commit: e4c0beed744d09dae4757c1893d8caa64ee09cd2 (patch)
tree: 0e35dcab1c2ab7a1b75609c6dd1dd11231a572eb /src/core/SkNx.h
parent: f1d415188ffb4c34e2886c2cfceb363a148333f1 (diff)
1 files changed, 88 insertions, 131 deletions
diff --git a/src/core/SkNx.h b/src/core/SkNx.h
index 7ae5d82976..69295d4fc3 100644
--- a/src/core/SkNx.h
+++ b/src/core/SkNx.h
@@ -8,20 +8,11 @@
 #ifndef SkNx_DEFINED
 #define SkNx_DEFINED
 
-
 //#define SKNX_NO_SIMD
 
 #include "SkScalar.h"
 #include "SkTypes.h"
 #include <math.h>
-#define REQUIRE(x) static_assert(x, #x)
-
-// This file may be included multiple times by .cpp files with different flags, leading
-// to different definitions.  Usually that doesn't matter because it's all inlined, but
-// in Debug modes the compilers may not inline everything.  So wrap everything in an
-// anonymous namespace to give each includer their own silo of this code (or the linker
-// will probably pick one randomly for us, which is rarely correct).
-namespace {
 
 // The default implementations just fall back on a pair of size N/2.
 // These support the union of operations we might do to ints and floats, but
@@ -30,68 +21,61 @@ template <int N, typename T>
 class SkNx {
 public:
     SkNx() {}
-    SkNx(const SkNx<N/2, T>& lo, const SkNx<N/2, T>& hi) : fLo(lo), fHi(hi) {}
     SkNx(T val) : fLo(val), fHi(val) {}
+
+    typedef SkNx<N/2, T> Half;
+    SkNx(const Half& lo, const Half& hi) : fLo(lo), fHi(hi) {}
+
+    SkNx(T a, T b)                                : fLo(a),       fHi(b)       {}
+    SkNx(T a, T b, T c, T d)                      : fLo(a,b),     fHi(c,d)     {}
+    SkNx(T a, T b, T c, T d,  T e, T f, T g, T h) : fLo(a,b,c,d), fHi(e,f,g,h) {}
+    SkNx(T a, T b, T c, T d,  T e, T f, T g, T h,
+         T i, T j, T k, T l,  T m, T n, T o, T p) : fLo(a,b,c,d, e,f,g,h), fHi(i,j,k,l, m,n,o,p) {}
+
     static SkNx Load(const void* ptr) {
         auto vals = (const T*)ptr;
-        return SkNx(SkNx<N/2,T>::Load(vals), SkNx<N/2,T>::Load(vals+N/2));
+        return SkNx(Half::Load(vals), Half::Load(vals+N/2));
     }
 
-    SkNx(T a, T b)                                : fLo(a),       fHi(b)       { REQUIRE(N==2); }
-    SkNx(T a, T b, T c, T d)                      : fLo(a,b),     fHi(c,d)     { REQUIRE(N==4); }
-    SkNx(T a, T b, T c, T d,  T e, T f, T g, T h) : fLo(a,b,c,d), fHi(e,f,g,h) { REQUIRE(N==8); }
-    SkNx(T a, T b, T c, T d,  T e, T f, T g, T h,
-         T i, T j, T k, T l,  T m, T n, T o, T p)
-        : fLo(a,b,c,d, e,f,g,h), fHi(i,j,k,l, m,n,o,p) { REQUIRE(N==16); }
-
     void store(void* ptr) const {
         auto vals = (T*)ptr;
         fLo.store(vals);
         fHi.store(vals+N/2);
     }
 
-    SkNx saturatedAdd(const SkNx& o) const {
-        return SkNx(fLo.saturatedAdd(o.fLo), fHi.saturatedAdd(o.fHi));
-    }
+#define OP(op) SkNx operator op(const SkNx& o) const { return {fLo op o.fLo, fHi op o.fHi}; }
+    OP(+) OP(-) OP(*) OP(/)
+    OP(&) OP(|) OP(^)
+    OP(==) OP(!=) OP(<) OP(>) OP(<=) OP(>=)
+#undef OP
 
-    SkNx operator + (const SkNx& o) const { return SkNx(fLo + o.fLo, fHi + o.fHi); }
-    SkNx operator - (const SkNx& o) const { return SkNx(fLo - o.fLo, fHi - o.fHi); }
-    SkNx operator * (const SkNx& o) const { return SkNx(fLo * o.fLo, fHi * o.fHi); }
-    SkNx operator / (const SkNx& o) const { return SkNx(fLo / o.fLo, fHi / o.fHi); }
+#define OP(op) SkNx op() const { return {fLo.op(), fHi.op()}; }
+    OP(abs)
+    OP(sqrt) OP(rsqrt0) OP(rsqrt1) OP(rsqrt2)
+    OP(invert) OP(approxInvert)
+#undef OP
 
     SkNx operator << (int bits) const { return SkNx(fLo << bits, fHi << bits); }
     SkNx operator >> (int bits) const { return SkNx(fLo >> bits, fHi >> bits); }
 
-    SkNx operator == (const SkNx& o) const { return SkNx(fLo == o.fLo, fHi == o.fHi); }
-    SkNx operator != (const SkNx& o) const { return SkNx(fLo != o.fLo, fHi != o.fHi); }
-    SkNx operator  < (const SkNx& o) const { return SkNx(fLo  < o.fLo, fHi  < o.fHi); }
-    SkNx operator  > (const SkNx& o) const { return SkNx(fLo  > o.fLo, fHi  > o.fHi); }
-    SkNx operator <= (const SkNx& o) const { return SkNx(fLo <= o.fLo, fHi <= o.fHi); }
-    SkNx operator >= (const SkNx& o) const { return SkNx(fLo >= o.fLo, fHi >= o.fHi); }
+    SkNx saturatedAdd(const SkNx& o) const {
+        return {fLo.saturatedAdd(o.fLo), fHi.saturatedAdd(o.fHi)};
+    }
 
     static SkNx Min(const SkNx& a, const SkNx& b) {
-        return SkNx(SkNx<N/2, T>::Min(a.fLo, b.fLo), SkNx<N/2, T>::Min(a.fHi, b.fHi));
+        return {Half::Min(a.fLo, b.fLo), Half::Min(a.fHi, b.fHi)};
     }
     static SkNx Max(const SkNx& a, const SkNx& b) {
-        return SkNx(SkNx<N/2, T>::Max(a.fLo, b.fLo), SkNx<N/2, T>::Max(a.fHi, b.fHi));
+        return {Half::Max(a.fLo, b.fLo), Half::Max(a.fHi, b.fHi)};
     }
 
-    SkNx abs() const { return SkNx(fLo.abs(), fHi.abs()); }
-
-    SkNx sqrt() const { return SkNx(fLo.sqrt(), fHi.sqrt()); }
-    // Generally, increasing precision, increasing cost.
-    SkNx rsqrt0() const { return SkNx(fLo.rsqrt0(), fHi.rsqrt0()); }
-    SkNx rsqrt1() const { return SkNx(fLo.rsqrt1(), fHi.rsqrt1()); }
-    SkNx rsqrt2() const { return SkNx(fLo.rsqrt2(), fHi.rsqrt2()); }
-
-    SkNx       invert() const { return SkNx(fLo.      invert(), fHi.      invert()); }
-    SkNx approxInvert() const { return SkNx(fLo.approxInvert(), fHi.approxInvert()); }
-
-    template <int k> T kth() const {
+    T operator[](int k) const {
         SkASSERT(0 <= k && k < N);
-        return k < N/2 ? fLo.template kth<k>() : fHi.template kth<k-N/2>();
+        return k < N/2 ? fLo[k] : fHi[k-N/2];
     }
 
+    template <int k> T kth() const { return (*this)[k]; }
+
     bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); }
     bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); }
     SkNx thenElse(const SkNx& t, const SkNx& e) const {
@@ -99,17 +83,18 @@ public:
     }
 
 protected:
-    REQUIRE(0 == (N & (N-1)));
+    static_assert(0 == (N & (N-1)), "N must be a power of 2.");
 
-    SkNx<N/2, T> fLo, fHi;
+    Half fLo, fHi;
 };
 
 // Bottom out the default implementations with scalars when nothing's been specialized.
 template <typename T>
-class SkNx<1,T> {
+class SkNx<1, T> {
 public:
     SkNx() {}
     SkNx(T val) : fVal(val) {}
+
     static SkNx Load(const void* ptr) {
         auto vals = (const T*)ptr;
         return SkNx(vals[0]);
@@ -120,45 +105,41 @@ public:
         vals[0] = fVal;
     }
 
+#define OP(op) SkNx operator op(const SkNx& o) const { return fVal op o.fVal; }
+    OP(+) OP(-) OP(*) OP(/)
+    OP(&) OP(|) OP(^)
+    OP(==) OP(!=) OP(<) OP(>) OP(<=) OP(>=)
+#undef OP
+
+    SkNx operator << (int bits) const { return fVal << bits; }
+    SkNx operator >> (int bits) const { return fVal >> bits; }
+
     SkNx saturatedAdd(const SkNx& o) const {
-        SkASSERT((T)(~0) > 0); // TODO: support signed T
+        SkASSERT((T)(~0) > 0); // TODO: support signed T?
         T sum = fVal + o.fVal;
-        return SkNx(sum < fVal ? (T)(~0) : sum);
+        return sum < fVal ? (T)(~0) : sum;
     }
 
-    SkNx operator + (const SkNx& o) const { return SkNx(fVal + o.fVal); }
-    SkNx operator - (const SkNx& o) const { return SkNx(fVal - o.fVal); }
-    SkNx operator * (const SkNx& o) const { return SkNx(fVal * o.fVal); }
-    SkNx operator / (const SkNx& o) const { return SkNx(fVal / o.fVal); }
-
-    SkNx operator << (int bits) const { return SkNx(fVal << bits); }
-    SkNx operator >> (int bits) const { return SkNx(fVal >> bits); }
-
-    SkNx operator == (const SkNx& o) const { return SkNx(fVal == o.fVal); }
-    SkNx operator != (const SkNx& o) const { return SkNx(fVal != o.fVal); }
-    SkNx operator  < (const SkNx& o) const { return SkNx(fVal  < o.fVal); }
-    SkNx operator  > (const SkNx& o) const { return SkNx(fVal  > o.fVal); }
-    SkNx operator <= (const SkNx& o) const { return SkNx(fVal <= o.fVal); }
-    SkNx operator >= (const SkNx& o) const { return SkNx(fVal >= o.fVal); }
-
-    static SkNx Min(const SkNx& a, const SkNx& b) { return SkNx(SkTMin(a.fVal, b.fVal)); }
-    static SkNx Max(const SkNx& a, const SkNx& b) { return SkNx(SkTMax(a.fVal, b.fVal)); }
+    static SkNx Min(const SkNx& a, const SkNx& b) { return SkTMin(a.fVal, b.fVal); }
+    static SkNx Max(const SkNx& a, const SkNx& b) { return SkTMax(a.fVal, b.fVal); }
 
     SkNx abs() const { return SkTAbs(fVal); }
 
-    SkNx  sqrt () const { return SkNx(Sqrt(fVal)); }
-    SkNx rsqrt0() const { return this->sqrt().invert();  }
+    SkNx  sqrt () const { return Sqrt(fVal); }
+    SkNx rsqrt0() const { return this->sqrt().invert(); }
     SkNx rsqrt1() const { return this->rsqrt0(); }
     SkNx rsqrt2() const { return this->rsqrt1(); }
 
-    SkNx       invert() const { return SkNx(1) / SkNx(fVal); }
-    SkNx approxInvert() const { return this->invert();    }
+    SkNx       invert() const { return 1 / fVal; }
+    SkNx approxInvert() const { return this->invert(); }
 
-    template <int k> T kth() const {
+    T operator[](int k) const {
         SkASSERT(0 == k);
         return fVal;
     }
 
+    template <int k> T kth() const { return (*this)[k]; }
+
     bool allTrue() const { return fVal != 0; }
     bool anyTrue() const { return fVal != 0; }
     SkNx thenElse(const SkNx& t, const SkNx& e) const { return fVal != 0 ? t : e; }
@@ -170,72 +151,51 @@ protected:
     T fVal;
 };
 
-// This default implementation can be specialized by ../opts/SkNx_foo.h
-// if there's a better platform-specific shuffle strategy.
-template <typename Nx, int... Ix>
-inline Nx SkNx_shuffle_impl(const Nx& src) { return Nx( src.template kth<Ix>()... ); }
-
-// This generic shuffle can be called with 1 or N indices:
+// This generic shuffle can be called to create any valid SkNx<N,T>.
 //     Sk4f f(a,b,c,d);
-//     SkNx_shuffle<3>(f);        // ~~~> Sk4f(d,d,d,d)
-//     SkNx_shuffle<2,1,0,3>(f);  // ~~~> Sk4f(c,b,a,d)
-template <int... Ix, typename Nx>
-inline Nx SkNx_shuffle(const Nx& src) { return SkNx_shuffle_impl<Nx, Ix...>(src); }
-
-// A reminder alias that shuffles can be used to duplicate a single index across a vector.
-template <int Ix, typename Nx>
-inline Nx SkNx_dup(const Nx& src) { return SkNx_shuffle<Ix>(src); }
-
-// This is a poor-man's std::make_index_sequence from C++14.
-// I'd implement it fully, but it hurts my head.
-template <int...> struct SkIntSequence {};
-template <int N> struct MakeSkIntSequence;
-template <> struct MakeSkIntSequence< 1> : SkIntSequence<0                                    >{};
-template <> struct MakeSkIntSequence< 2> : SkIntSequence<0,1                                  >{};
-template <> struct MakeSkIntSequence< 4> : SkIntSequence<0,1,2,3                              >{};
-template <> struct MakeSkIntSequence< 8> : SkIntSequence<0,1,2,3,4,5,6,7                      >{};
-template <> struct MakeSkIntSequence<16> : SkIntSequence<0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>{};
-
-// This is the default/fallback implementation for SkNx_cast.  Best to specialize SkNx_cast!
-template <typename D, typename S, int N, int... Ix>
-SkNx<N,D> SkNx_cast_fallback(const SkNx<N,S>& src, SkIntSequence<Ix...>) {
-    return SkNx<N,D>( (D)src.template kth<Ix>()... );
-}
+//     Sk2f t = SkNx_shuffle<2,1>(f);  // ~~~> Sk2f(c,b)
+//     f = SkNx_shuffle<0,1,1,0>(t);   // ~~~> Sk4f(c,b,b,c)
+template <int... Ix, int N, typename T>
+static inline SkNx<sizeof...(Ix), T> SkNx_shuffle(const SkNx<N,T>& src) { return { src[Ix]... }; }
 
 // This is a generic cast between two SkNx with the same number of elements N.  E.g.
-//   Sk4b bs = ...;                    // Load 4 bytes.
-//   Sk4f fs = SkNx_cast<float>(bs);   // Cast each byte to a float.
-//   Sk4i is = SkNx_cast<int>(fs);     // Cast each float to int.
-// This can be specialized in ../opts/SkNx_foo.h if there's a better platform-specific cast.
-template <typename D, typename S, int N>
-SkNx<N,D> SkNx_cast(const SkNx<N,S>& src) {
-    return SkNx_cast_fallback<D,S,N>(src, MakeSkIntSequence<N>());
+//   Sk4b bs = ...;                     // Load 4 bytes.
+//   Sk4f fs = SkNx_cast<float>(bs);    // Cast each byte to a float.
+//   Sk4h hs = SkNx_cast<uint16_t>(fs); // Cast each float to uint16_t.
+template <typename D, typename S>
+static inline SkNx<2,D> SkNx_cast(const SkNx<2,S>& src) {
+    return { (D)src[0], (D)src[1] };
 }
 
-}  // namespace
+template <typename D, typename S>
+static inline SkNx<4,D> SkNx_cast(const SkNx<4,S>& src) {
+    return { (D)src[0], (D)src[1], (D)src[2], (D)src[3] };
+}
 
-typedef SkNx<2, float> Sk2f;
-typedef SkNx<4, float> Sk4f;
-typedef SkNx<8, float> Sk8f;
+template <typename D, typename S>
+static inline SkNx<8,D> SkNx_cast(const SkNx<8,S>& src) {
+    return { (D)src[0], (D)src[1], (D)src[2], (D)src[3],
+             (D)src[4], (D)src[5], (D)src[6], (D)src[7] };
+}
 
-typedef SkNx<2, double> Sk2d;
-typedef SkNx<4, double> Sk4d;
-typedef SkNx<8, double> Sk8d;
+template <typename D, typename S>
+static inline SkNx<16,D> SkNx_cast(const SkNx<16,S>& src) {
+    return { (D)src[ 0], (D)src[ 1], (D)src[ 2], (D)src[ 3],
+             (D)src[ 4], (D)src[ 5], (D)src[ 6], (D)src[ 7],
+             (D)src[ 8], (D)src[ 9], (D)src[10], (D)src[11],
+             (D)src[12], (D)src[13], (D)src[14], (D)src[15] };
+}
 
-typedef SkNx<2, SkScalar> Sk2s;
-typedef SkNx<4, SkScalar> Sk4s;
-typedef SkNx<8, SkScalar> Sk8s;
+typedef SkNx<2,     float> Sk2f;
+typedef SkNx<4,     float> Sk4f;
+typedef SkNx<2,  SkScalar> Sk2s;
+typedef SkNx<4,  SkScalar> Sk4s;
 
-typedef SkNx< 4, uint16_t> Sk4h;
-typedef SkNx< 8, uint16_t> Sk8h;
+typedef SkNx<4,   uint8_t> Sk4b;
+typedef SkNx<16,  uint8_t> Sk16b;
+typedef SkNx<4,  uint16_t> Sk4h;
 typedef SkNx<16, uint16_t> Sk16h;
 
-typedef SkNx< 4, uint8_t>  Sk4b;
-typedef SkNx< 8, uint8_t>  Sk8b;
-typedef SkNx<16, uint8_t>  Sk16b;
-
-typedef SkNx<4, int> Sk4i;
-
 // Include platform specific specializations if available.
 #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
     #include "../opts/SkNx_sse.h"
@@ -251,7 +211,4 @@ typedef SkNx<4, int> Sk4i;
     }
 #endif
 
-#undef REQUIRE
-
-
 #endif//SkNx_DEFINED
author	mtklein <mtklein@chromium.org>	2016-02-09 10:35:27 -0800
committer	Commit bot <commit-bot@chromium.org>	2016-02-09 10:35:28 -0800
commit	e4c0beed744d09dae4757c1893d8caa64ee09cd2 (patch)
tree	0e35dcab1c2ab7a1b75609c6dd1dd11231a572eb /src/core/SkNx.h
parent	f1d415188ffb4c34e2886c2cfceb363a148333f1 (diff)