diff options
author | mtklein <mtklein@google.com> | 2015-06-24 14:30:43 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-06-24 14:30:43 -0700 |
commit | 0cc1f0a8d5ed69c76d75061bc2dee3b1d0ce0605 (patch) | |
tree | f3978f17559721a0e69c556d975d057986678e70 /src | |
parent | b9d4163bebab0f5639f9c5928bb5fc15f472dddc (diff) |
Revert of Implement four more xfermodes with Sk4px. (patchset #16 id:290001 of https://codereview.chromium.org/1196713004/)
Reason for revert:
64-bit ARM build failures.
Original issue's description:
> Implement four more xfermodes with Sk4px.
>
> HardLight, Overlay, Darken, and Lighten are all
> ~2x faster with SSE, ~25% faster with NEON.
>
> This covers all previously-implemented NEON xfermodes.
> 3 previous SSE xfermodes remain. Those need division
> and sqrt, so I'm planning on using SkPMFloat for them.
> It'll help the readability and NEON speed if I move that
> into [0,1] space first.
>
> The main new concept here is c.thenElse(t,e), which behaves like
> (c ? t : e) except, of course, both t and e are evaluated. This allows
> us to emulate conditionals with vectors.
>
> This also removes the concept of SkNb. Instead of a standalone bool
> vector, each SkNi or SkNf will just return their own types for
> comparisons. Turns out to be a lot more manageable this way.
>
> BUG=skia:
>
> Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
TBR=reed@google.com,mtklein@chromium.org
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
BUG=skia:
Review URL: https://codereview.chromium.org/1205703008
Diffstat (limited to 'src')
-rw-r--r-- | src/core/Sk4px.h | 2 | ||||
-rw-r--r-- | src/core/Sk4pxXfermode.h | 60 | ||||
-rw-r--r-- | src/core/SkNx.h | 92 | ||||
-rw-r--r-- | src/opts/SkNx_neon.h | 118 | ||||
-rw-r--r-- | src/opts/SkNx_sse.h | 94 | ||||
-rw-r--r-- | src/opts/SkXfermode_opts_SSE2.cpp | 8 | ||||
-rw-r--r-- | src/opts/SkXfermode_opts_arm_neon.cpp | 1 |
7 files changed, 178 insertions, 197 deletions
diff --git a/src/core/Sk4px.h b/src/core/Sk4px.h index e046e265fe..26d4d0f865 100644 --- a/src/core/Sk4px.h +++ b/src/core/Sk4px.h @@ -85,8 +85,6 @@ public: // These just keep the types as Sk4px so the user doesn't have to keep casting. Sk4px operator + (const Sk4px& o) const { return INHERITED::operator+(o); } Sk4px operator - (const Sk4px& o) const { return INHERITED::operator-(o); } - Sk4px operator < (const Sk4px& o) const { return INHERITED::operator<(o); } - Sk4px thenElse(const Sk4px& t, const Sk4px& e) const { return INHERITED::thenElse(t,e); } // Generally faster than (*this * o).div255(). // May be incorrect by +-1, but is always exactly correct when *this or o is 0 or 255. diff --git a/src/core/Sk4pxXfermode.h b/src/core/Sk4pxXfermode.h index 09490dc990..b4ebd850e3 100644 --- a/src/core/Sk4pxXfermode.h +++ b/src/core/Sk4pxXfermode.h @@ -60,44 +60,6 @@ XFERMODE(Exclusion) { return (s - p) + (d - p.zeroAlphas()); } -XFERMODE(HardLight) { - auto alphas = SrcOver::Xfer(s,d); - - auto sa = s.alphas(), - da = d.alphas(); - - auto isDark = s < (sa-s); - - auto dark = s*d << 1, - lite = sa*da - ((da-d)*(sa-s) << 1), - both = s*da.inv() + d*sa.inv(); - - // TODO: do isDark in 16-bit so we only have to div255() once. - auto colors = isDark.thenElse((dark + both).div255(), - (lite + both).div255()); - return alphas.zeroColors() + colors.zeroAlphas(); -} -XFERMODE(Overlay) { return HardLight::Xfer(d,s); } - -XFERMODE(Darken) { - auto sda = s.approxMulDiv255(d.alphas()), - dsa = d.approxMulDiv255(s.alphas()); - auto srcover = s + (d - dsa), - dstover = d + (s - sda); - auto alphas = srcover, - colors = (sda < dsa).thenElse(srcover, dstover); - return alphas.zeroColors() + colors.zeroAlphas(); -} -XFERMODE(Lighten) { - auto sda = s.approxMulDiv255(d.alphas()), - dsa = d.approxMulDiv255(s.alphas()); - auto srcover = s + (d - dsa), - dstover = d + (s - sda); - auto alphas = srcover, - colors = (sda < dsa).thenElse(dstover, srcover); - return alphas.zeroColors() + colors.zeroAlphas(); -} - #undef XFERMODE // A reasonable fallback mode for doing AA is to simply apply the transfermode first, @@ -109,15 +71,17 @@ static Sk4px xfer_aa(const Sk4px& s, const Sk4px& d, const Sk4px& aa) { } // For some transfermodes we specialize AA, either for correctness or performance. -#define XFERMODE_AA(Name) \ - template <> Sk4px xfer_aa<Name>(const Sk4px& s, const Sk4px& d, const Sk4px& aa) +#ifndef SK_NO_SPECIALIZED_AA_XFERMODES + #define XFERMODE_AA(Name) \ + template <> Sk4px xfer_aa<Name>(const Sk4px& s, const Sk4px& d, const Sk4px& aa) -// Plus' clamp needs to happen after AA. skia:3852 -XFERMODE_AA(Plus) { // [ clamp( (1-AA)D + (AA)(S+D) ) == clamp(D + AA*S) ] - return d.saturatedAdd(s.approxMulDiv255(aa)); -} + // Plus' clamp needs to happen after AA. skia:3852 + XFERMODE_AA(Plus) { // [ clamp( (1-AA)D + (AA)(S+D) ) == clamp(D + AA*S) ] + return d.saturatedAdd(s.approxMulDiv255(aa)); + } -#undef XFERMODE_AA + #undef XFERMODE_AA +#endif template <typename ProcType> class SkT4pxXfermode : public SkProcCoeffXfermode { @@ -166,12 +130,6 @@ static SkProcCoeffXfermode* SkCreate4pxXfermode(const ProcCoeff& rec, SkXfermode case SkXfermode::kMultiply_Mode: return SkT4pxXfermode<Multiply>::Create(rec); case SkXfermode::kDifference_Mode: return SkT4pxXfermode<Difference>::Create(rec); case SkXfermode::kExclusion_Mode: return SkT4pxXfermode<Exclusion>::Create(rec); -#if !defined(SK_SUPPORT_LEGACY_XFERMODES) // For staging in Chrome (layout tests). - case SkXfermode::kHardLight_Mode: return SkT4pxXfermode<HardLight>::Create(rec); - case SkXfermode::kOverlay_Mode: return SkT4pxXfermode<Overlay>::Create(rec); - case SkXfermode::kDarken_Mode: return SkT4pxXfermode<Darken>::Create(rec); - case SkXfermode::kLighten_Mode: return SkT4pxXfermode<Lighten>::Create(rec); -#endif default: break; } #endif diff --git a/src/core/SkNx.h b/src/core/SkNx.h index dadb3ec132..1342266b93 100644 --- a/src/core/SkNx.h +++ b/src/core/SkNx.h @@ -26,6 +26,22 @@ namespace { // The default implementations just fall back on a pair of size N/2. +// SkNb is a _very_ minimal class representing a vector of bools returned by comparison operators. +// We pass along the byte size of the compared types (Bytes) to help platform specializations. +template <int N, int Bytes> +class SkNb { +public: + SkNb() {} + SkNb(const SkNb<N/2, Bytes>& lo, const SkNb<N/2, Bytes>& hi) : fLo(lo), fHi(hi) {} + + bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); } + bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); } + +protected: + REQUIRE(0 == (N & (N-1))); + SkNb<N/2, Bytes> fLo, fHi; +}; + template <int N, typename T> class SkNi { public: @@ -62,19 +78,14 @@ public: static SkNi Min(const SkNi& a, const SkNi& b) { return SkNi(SkNi<N/2, T>::Min(a.fLo, b.fLo), SkNi<N/2, T>::Min(a.fHi, b.fHi)); } - SkNi operator < (const SkNi& o) const { return SkNi(fLo < o.fLo, fHi < o.fHi); } + + // TODO: comparisons, max? template <int k> T kth() const { SkASSERT(0 <= k && k < N); return k < N/2 ? fLo.template kth<k>() : fHi.template kth<k-N/2>(); } - bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); } - bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); } - SkNi thenElse(const SkNi& t, const SkNi& e) const { - return SkNi(fLo.thenElse(t.fLo, e.fLo), fHi.thenElse(t.fHi, e.fHi)); - } - protected: REQUIRE(0 == (N & (N-1))); @@ -83,9 +94,11 @@ protected: template <int N, typename T> class SkNf { + typedef SkNb<N, sizeof(T)> Nb; + static int32_t MyNi(float); static int64_t MyNi(double); - typedef decltype(MyNi(T())) I; + typedef SkNi<N, decltype(MyNi(T()))> Ni; public: SkNf() {} explicit SkNf(T val) : fLo(val), fHi(val) {} @@ -102,19 +115,19 @@ public: fHi.store(vals+N/2); } - SkNi<N,I> castTrunc() const { return SkNi<N,I>(fLo.castTrunc(), fHi.castTrunc()); } + Ni castTrunc() const { return Ni(fLo.castTrunc(), fHi.castTrunc()); } SkNf operator + (const SkNf& o) const { return SkNf(fLo + o.fLo, fHi + o.fHi); } SkNf operator - (const SkNf& o) const { return SkNf(fLo - o.fLo, fHi - o.fHi); } SkNf operator * (const SkNf& o) const { return SkNf(fLo * o.fLo, fHi * o.fHi); } SkNf operator / (const SkNf& o) const { return SkNf(fLo / o.fLo, fHi / o.fHi); } - SkNf operator == (const SkNf& o) const { return SkNf(fLo == o.fLo, fHi == o.fHi); } - SkNf operator != (const SkNf& o) const { return SkNf(fLo != o.fLo, fHi != o.fHi); } - SkNf operator < (const SkNf& o) const { return SkNf(fLo < o.fLo, fHi < o.fHi); } - SkNf operator > (const SkNf& o) const { return SkNf(fLo > o.fLo, fHi > o.fHi); } - SkNf operator <= (const SkNf& o) const { return SkNf(fLo <= o.fLo, fHi <= o.fHi); } - SkNf operator >= (const SkNf& o) const { return SkNf(fLo >= o.fLo, fHi >= o.fHi); } + Nb operator == (const SkNf& o) const { return Nb(fLo == o.fLo, fHi == o.fHi); } + Nb operator != (const SkNf& o) const { return Nb(fLo != o.fLo, fHi != o.fHi); } + Nb operator < (const SkNf& o) const { return Nb(fLo < o.fLo, fHi < o.fHi); } + Nb operator > (const SkNf& o) const { return Nb(fLo > o.fLo, fHi > o.fHi); } + Nb operator <= (const SkNf& o) const { return Nb(fLo <= o.fLo, fHi <= o.fHi); } + Nb operator >= (const SkNf& o) const { return Nb(fLo >= o.fLo, fHi >= o.fHi); } static SkNf Min(const SkNf& l, const SkNf& r) { return SkNf(SkNf<N/2,T>::Min(l.fLo, r.fLo), SkNf<N/2,T>::Min(l.fHi, r.fHi)); @@ -138,12 +151,6 @@ public: return k < N/2 ? fLo.template kth<k>() : fHi.template kth<k-N/2>(); } - bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); } - bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); } - SkNf thenElse(const SkNf& t, const SkNf& e) const { - return SkNf(fLo.thenElse(t.fLo, t.fHi), fHi.thenElse(t.fLo, t.fHi)); - } - protected: REQUIRE(0 == (N & (N-1))); SkNf(const SkNf<N/2, T>& lo, const SkNf<N/2, T>& hi) : fLo(lo), fHi(hi) {} @@ -154,6 +161,17 @@ protected: // Bottom out the default implementations with scalars when nothing's been specialized. +template <int Bytes> +class SkNb<1, Bytes> { +public: + SkNb() {} + explicit SkNb(bool val) : fVal(val) {} + bool allTrue() const { return fVal; } + bool anyTrue() const { return fVal; } +protected: + bool fVal; +}; + template <typename T> class SkNi<1,T> { public: @@ -177,26 +195,23 @@ public: SkNi operator >> (int bits) const { return SkNi(fVal >> bits); } static SkNi Min(const SkNi& a, const SkNi& b) { return SkNi(SkTMin(a.fVal, b.fVal)); } - SkNi operator <(const SkNi& o) const { return SkNi(fVal < o.fVal); } template <int k> T kth() const { SkASSERT(0 == k); return fVal; } - bool allTrue() const { return fVal; } - bool anyTrue() const { return fVal; } - SkNi thenElse(const SkNi& t, const SkNi& e) const { return fVal ? t : e; } - protected: T fVal; }; template <typename T> class SkNf<1,T> { + typedef SkNb<1, sizeof(T)> Nb; + static int32_t MyNi(float); static int64_t MyNi(double); - typedef decltype(MyNi(T())) I; + typedef SkNi<1, decltype(MyNi(T()))> Ni; public: SkNf() {} explicit SkNf(T val) : fVal(val) {} @@ -204,19 +219,19 @@ public: void store(T vals[1]) const { vals[0] = fVal; } - SkNi<1,I> castTrunc() const { return SkNi<1,I>(fVal); } + Ni castTrunc() const { return Ni(fVal); } SkNf operator + (const SkNf& o) const { return SkNf(fVal + o.fVal); } SkNf operator - (const SkNf& o) const { return SkNf(fVal - o.fVal); } SkNf operator * (const SkNf& o) const { return SkNf(fVal * o.fVal); } SkNf operator / (const SkNf& o) const { return SkNf(fVal / o.fVal); } - SkNf operator == (const SkNf& o) const { return SkNf(fVal == o.fVal); } - SkNf operator != (const SkNf& o) const { return SkNf(fVal != o.fVal); } - SkNf operator < (const SkNf& o) const { return SkNf(fVal < o.fVal); } - SkNf operator > (const SkNf& o) const { return SkNf(fVal > o.fVal); } - SkNf operator <= (const SkNf& o) const { return SkNf(fVal <= o.fVal); } - SkNf operator >= (const SkNf& o) const { return SkNf(fVal >= o.fVal); } + Nb operator == (const SkNf& o) const { return Nb(fVal == o.fVal); } + Nb operator != (const SkNf& o) const { return Nb(fVal != o.fVal); } + Nb operator < (const SkNf& o) const { return Nb(fVal < o.fVal); } + Nb operator > (const SkNf& o) const { return Nb(fVal > o.fVal); } + Nb operator <= (const SkNf& o) const { return Nb(fVal <= o.fVal); } + Nb operator >= (const SkNf& o) const { return Nb(fVal >= o.fVal); } static SkNf Min(const SkNf& l, const SkNf& r) { return SkNf(SkTMin(l.fVal, r.fVal)); } static SkNf Max(const SkNf& l, const SkNf& r) { return SkNf(SkTMax(l.fVal, r.fVal)); } @@ -234,21 +249,12 @@ public: return fVal; } - bool allTrue() const { return this->pun(); } - bool anyTrue() const { return this->pun(); } - SkNf thenElse(const SkNf& t, const SkNf& e) const { return this->pun() ? t : e; } - protected: // We do double sqrts natively, or via floats for any other type. template <typename U> static U Sqrt(U val) { return (U) ::sqrtf((float)val); } static double Sqrt(double val) { return ::sqrt ( val); } - I pun() const { - union { T f; I i; } pun = { fVal }; - return pun.i; - } - T fVal; }; diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h index d1760b3d94..da926e0b4c 100644 --- a/src/opts/SkNx_neon.h +++ b/src/opts/SkNx_neon.h @@ -33,7 +33,34 @@ namespace { // See SkNx.h case 31: return op(v, 31); } return fVec template <> +class SkNb<2, 4> { +public: + SkNb(uint32x2_t vec) : fVec(vec) {} + + SkNb() {} + bool allTrue() const { return vget_lane_u32(fVec, 0) && vget_lane_u32(fVec, 1); } + bool anyTrue() const { return vget_lane_u32(fVec, 0) || vget_lane_u32(fVec, 1); } + + uint32x2_t fVec; +}; + +template <> +class SkNb<4, 4> { +public: + SkNb(uint32x4_t vec) : fVec(vec) {} + + SkNb() {} + bool allTrue() const { return vgetq_lane_u32(fVec, 0) && vgetq_lane_u32(fVec, 1) + && vgetq_lane_u32(fVec, 2) && vgetq_lane_u32(fVec, 3); } + bool anyTrue() const { return vgetq_lane_u32(fVec, 0) || vgetq_lane_u32(fVec, 1) + || vgetq_lane_u32(fVec, 2) || vgetq_lane_u32(fVec, 3); } + + uint32x4_t fVec; +}; + +template <> class SkNf<2, float> { + typedef SkNb<2, 4> Nb; public: SkNf(float32x2_t vec) : fVec(vec) {} @@ -66,14 +93,12 @@ public: #endif } - SkNf operator == (const SkNf& o) const { return vreinterpret_f32_u32(vceq_f32(fVec, o.fVec)); } - SkNf operator < (const SkNf& o) const { return vreinterpret_f32_u32(vclt_f32(fVec, o.fVec)); } - SkNf operator > (const SkNf& o) const { return vreinterpret_f32_u32(vcgt_f32(fVec, o.fVec)); } - SkNf operator <= (const SkNf& o) const { return vreinterpret_f32_u32(vcle_f32(fVec, o.fVec)); } - SkNf operator >= (const SkNf& o) const { return vreinterpret_f32_u32(vcge_f32(fVec, o.fVec)); } - SkNf operator != (const SkNf& o) const { - return vreinterpret_f32_u32(vmvn_u32(vceq_f32(fVec, o.fVec))); - } + Nb operator == (const SkNf& o) const { return vceq_f32(fVec, o.fVec); } + Nb operator < (const SkNf& o) const { return vclt_f32(fVec, o.fVec); } + Nb operator > (const SkNf& o) const { return vcgt_f32(fVec, o.fVec); } + Nb operator <= (const SkNf& o) const { return vcle_f32(fVec, o.fVec); } + Nb operator >= (const SkNf& o) const { return vcge_f32(fVec, o.fVec); } + Nb operator != (const SkNf& o) const { return vmvn_u32(vceq_f32(fVec, o.fVec)); } static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fVec); } static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fVec); } @@ -101,21 +126,25 @@ public: return vget_lane_f32(fVec, k&1); } - bool allTrue() const { - auto v = vreinterpret_u32_f32(fVec); - return vget_lane_u32(v,0) && vget_lane_u32(v,1); - } - bool anyTrue() const { - auto v = vreinterpret_u32_f32(fVec); - return vget_lane_u32(v,0) || vget_lane_u32(v,1); - } - float32x2_t fVec; }; #if defined(SK_CPU_ARM64) template <> +class SkNb<2, 8> { +public: + SkNb(uint64x2_t vec) : fVec(vec) {} + + SkNb() {} + bool allTrue() const { return vgetq_lane_u64(fVec, 0) && vgetq_lane_u64(fVec, 1); } + bool anyTrue() const { return vgetq_lane_u64(fVec, 0) || vgetq_lane_u64(fVec, 1); } + + uint64x2_t fVec; +}; + +template <> class SkNf<2, double> { + typedef SkNb<2, 8> Nb; public: SkNf(float64x2_t vec) : fVec(vec) {} @@ -131,13 +160,13 @@ public: SkNf operator * (const SkNf& o) const { return vmulq_f64(fVec, o.fVec); } SkNf operator / (const SkNf& o) const { return vdivq_f64(fVec, o.fVec); } - SkNf operator==(const SkNf& o) const { return vreinterpretq_f64_u64(vceqq_f64(fVec, o.fVec)); } - SkNf operator <(const SkNf& o) const { return vreinterpretq_f64_u64(vcltq_f64(fVec, o.fVec)); } - SkNf operator >(const SkNf& o) const { return vreinterpretq_f64_u64(vcgtq_f64(fVec, o.fVec)); } - SkNf operator<=(const SkNf& o) const { return vreinterpretq_f64_u64(vcleq_f64(fVec, o.fVec)); } - SkNf operator>=(const SkNf& o) const { return vreinterpretq_f64_u64(vcgeq_f64(fVec, o.fVec)); } - SkNf operator != (const SkNf& o) const { - return vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(fVec, o.fVec)))); + Nb operator == (const SkNf& o) const { return vceqq_f64(fVec, o.fVec); } + Nb operator < (const SkNf& o) const { return vcltq_f64(fVec, o.fVec); } + Nb operator > (const SkNf& o) const { return vcgtq_f64(fVec, o.fVec); } + Nb operator <= (const SkNf& o) const { return vcleq_f64(fVec, o.fVec); } + Nb operator >= (const SkNf& o) const { return vcgeq_f64(fVec, o.fVec); } + Nb operator != (const SkNf& o) const { + return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(fVec, o.fVec)))); } static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.fVec); } @@ -173,15 +202,6 @@ public: return vgetq_lane_f64(fVec, k&1); } - bool allTrue() const { - auto v = vreinterpretq_u64_f64(fVec); - return vgetq_lane_u64(v,0) && vgetq_lane_u64(v,1); - } - bool anyTrue() const { - auto v = vreinterpretq_u64_f64(fVec); - return vgetq_lane_u64(v,0) || vgetq_lane_u64(v,1); - } - float64x2_t fVec; }; #endif//defined(SK_CPU_ARM64) @@ -215,6 +235,7 @@ public: template <> class SkNf<4, float> { + typedef SkNb<4, 4> Nb; public: SkNf(float32x4_t vec) : fVec(vec) {} @@ -249,14 +270,12 @@ public: #endif } - SkNf operator==(const SkNf& o) const { return vreinterpretq_f32_u32(vceqq_f32(fVec, o.fVec)); } - SkNf operator <(const SkNf& o) const { return vreinterpretq_f32_u32(vcltq_f32(fVec, o.fVec)); } - SkNf operator >(const SkNf& o) const { return vreinterpretq_f32_u32(vcgtq_f32(fVec, o.fVec)); } - SkNf operator<=(const SkNf& o) const { return vreinterpretq_f32_u32(vcleq_f32(fVec, o.fVec)); } - SkNf operator>=(const SkNf& o) const { return vreinterpretq_f32_u32(vcgeq_f32(fVec, o.fVec)); } - SkNf operator!=(const SkNf& o) const { - return vreinterpretq_f32_u32(vmvnq_u32(vceqq_f32(fVec, o.fVec))); - } + Nb operator == (const SkNf& o) const { return vceqq_f32(fVec, o.fVec); } + Nb operator < (const SkNf& o) const { return vcltq_f32(fVec, o.fVec); } + Nb operator > (const SkNf& o) const { return vcgtq_f32(fVec, o.fVec); } + Nb operator <= (const SkNf& o) const { return vcleq_f32(fVec, o.fVec); } + Nb operator >= (const SkNf& o) const { return vcgeq_f32(fVec, o.fVec); } + Nb operator != (const SkNf& o) const { return vmvnq_u32(vceqq_f32(fVec, o.fVec)); } static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.fVec); } static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.fVec); } @@ -284,17 +303,6 @@ public: return vgetq_lane_f32(fVec, k&3); } - bool allTrue() const { - auto v = vreinterpretq_u32_f32(fVec); - return vgetq_lane_u32(v,0) && vgetq_lane_u32(v,1) - && vgetq_lane_u32(v,2) && vgetq_lane_u32(v,3); - } - bool anyTrue() const { - auto v = vreinterpretq_u32_f32(fVec); - return vgetq_lane_u32(v,0) || vgetq_lane_u32(v,1) - || vgetq_lane_u32(v,2) || vgetq_lane_u32(v,3); - } - float32x4_t fVec; }; @@ -355,18 +363,12 @@ public: SkNi operator - (const SkNi& o) const { return vsubq_u8(fVec, o.fVec); } static SkNi Min(const SkNi& a, const SkNi& b) { return vminq_u8(a.fVec, b.fVec); } - SkNi operator < (const SkNi& o) const { return vcltq_u8(fVec, o.fVec); } template <int k> uint8_t kth() const { SkASSERT(0 <= k && k < 15); return vgetq_lane_u8(fVec, k&16); } - SkNi thenElse(const SkNi& t, const SkNi& e) const { - return vorrq_u8(vandq_u8(t.fVec, fVec), - vbicq_u8(e.fVec, fVec)); - } - uint8x16_t fVec; }; diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h index 9b4de700ee..12a4719d0c 100644 --- a/src/opts/SkNx_sse.h +++ b/src/opts/SkNx_sse.h @@ -12,9 +12,46 @@ namespace { // See SkNx.h +template <> +class SkNb<2, 4> { +public: + SkNb(const __m128i& vec) : fVec(vec) {} + + SkNb() {} + bool allTrue() const { return 0xff == (_mm_movemask_epi8(fVec) & 0xff); } + bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(fVec) & 0xff); } + + __m128i fVec; +}; + +template <> +class SkNb<4, 4> { +public: + SkNb(const __m128i& vec) : fVec(vec) {} + + SkNb() {} + bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); } + bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); } + + __m128i fVec; +}; + +template <> +class SkNb<2, 8> { +public: + SkNb(const __m128i& vec) : fVec(vec) {} + + SkNb() {} + bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); } + bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); } + + __m128i fVec; +}; + template <> class SkNf<2, float> { + typedef SkNb<2, 4> Nb; public: SkNf(const __m128& vec) : fVec(vec) {} @@ -32,12 +69,12 @@ public: SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } - SkNf operator == (const SkNf& o) const { return _mm_cmpeq_ps (fVec, o.fVec); } - SkNf operator != (const SkNf& o) const { return _mm_cmpneq_ps(fVec, o.fVec); } - SkNf operator < (const SkNf& o) const { return _mm_cmplt_ps (fVec, o.fVec); } - SkNf operator > (const SkNf& o) const { return _mm_cmpgt_ps (fVec, o.fVec); } - SkNf operator <= (const SkNf& o) const { return _mm_cmple_ps (fVec, o.fVec); } - SkNf operator >= (const SkNf& o) const { return _mm_cmpge_ps (fVec, o.fVec); } + Nb operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps (fVec, o.fVec)); } + Nb operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps(fVec, o.fVec)); } + Nb operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps (fVec, o.fVec)); } + Nb operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps (fVec, o.fVec)); } + Nb operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps (fVec, o.fVec)); } + Nb operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps (fVec, o.fVec)); } static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.fVec); } static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.fVec); } @@ -56,14 +93,12 @@ public: return pun.fs[k&1]; } - bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fVec)) & 0xff); } - bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fVec)) & 0xff); } - __m128 fVec; }; template <> class SkNf<2, double> { + typedef SkNb<2, 8> Nb; public: SkNf(const __m128d& vec) : fVec(vec) {} @@ -79,12 +114,12 @@ public: SkNf operator * (const SkNf& o) const { return _mm_mul_pd(fVec, o.fVec); } SkNf operator / (const SkNf& o) const { return _mm_div_pd(fVec, o.fVec); } - SkNf operator == (const SkNf& o) const { return _mm_cmpeq_pd (fVec, o.fVec); } - SkNf operator != (const SkNf& o) const { return _mm_cmpneq_pd(fVec, o.fVec); } - SkNf operator < (const SkNf& o) const { return _mm_cmplt_pd (fVec, o.fVec); } - SkNf operator > (const SkNf& o) const { return _mm_cmpgt_pd (fVec, o.fVec); } - SkNf operator <= (const SkNf& o) const { return _mm_cmple_pd (fVec, o.fVec); } - SkNf operator >= (const SkNf& o) const { return _mm_cmpge_pd (fVec, o.fVec); } + Nb operator == (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpeq_pd (fVec, o.fVec)); } + Nb operator != (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpneq_pd(fVec, o.fVec)); } + Nb operator < (const SkNf& o) const { return _mm_castpd_si128(_mm_cmplt_pd (fVec, o.fVec)); } + Nb operator > (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpgt_pd (fVec, o.fVec)); } + Nb operator <= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmple_pd (fVec, o.fVec)); } + Nb operator >= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpge_pd (fVec, o.fVec)); } static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_pd(l.fVec, r.fVec); } static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_pd(l.fVec, r.fVec); } @@ -103,9 +138,6 @@ public: return pun.ds[k&1]; } - bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castpd_si128(fVec)); } - bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castpd_si128(fVec)); } - __m128d fVec; }; @@ -149,6 +181,7 @@ public: template <> class SkNf<4, float> { + typedef SkNb<4, 4> Nb; public: SkNf(const __m128& vec) : fVec(vec) {} @@ -166,12 +199,12 @@ public: SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } - SkNf operator == (const SkNf& o) const { return _mm_cmpeq_ps (fVec, o.fVec); } - SkNf operator != (const SkNf& o) const { return _mm_cmpneq_ps(fVec, o.fVec); } - SkNf operator < (const SkNf& o) const { return _mm_cmplt_ps (fVec, o.fVec); } - SkNf operator > (const SkNf& o) const { return _mm_cmpgt_ps (fVec, o.fVec); } - SkNf operator <= (const SkNf& o) const { return _mm_cmple_ps (fVec, o.fVec); } - SkNf operator >= (const SkNf& o) const { return _mm_cmpge_ps (fVec, o.fVec); } + Nb operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps (fVec, o.fVec)); } + Nb operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps(fVec, o.fVec)); } + Nb operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps (fVec, o.fVec)); } + Nb operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps (fVec, o.fVec)); } + Nb operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps (fVec, o.fVec)); } + Nb operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps (fVec, o.fVec)); } static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.fVec); } static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.fVec); } @@ -190,9 +223,6 @@ public: return pun.fs[k&3]; } - bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castps_si128(fVec)); } - bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castps_si128(fVec)); } - __m128 fVec; }; @@ -282,11 +312,6 @@ public: SkNi operator - (const SkNi& o) const { return _mm_sub_epi8(fVec, o.fVec); } static SkNi Min(const SkNi& a, const SkNi& b) { return _mm_min_epu8(a.fVec, b.fVec); } - SkNi operator < (const SkNi& o) const { - // There's no unsigned _mm_cmplt_epu8, so we flip the sign bits then use a signed compare. - auto flip = _mm_set1_epi8(char(0x80)); - return _mm_cmplt_epi8(_mm_xor_si128(flip, fVec), _mm_xor_si128(flip, o.fVec)); - } template <int k> uint8_t kth() const { SkASSERT(0 <= k && k < 16); @@ -295,11 +320,6 @@ public: return k % 2 == 0 ? pair : (pair >> 8); } - SkNi thenElse(const SkNi& t, const SkNi& e) const { - return _mm_or_si128(_mm_and_si128 (fVec, t.fVec), - _mm_andnot_si128(fVec, e.fVec)); - } - __m128i fVec; }; diff --git a/src/opts/SkXfermode_opts_SSE2.cpp b/src/opts/SkXfermode_opts_SSE2.cpp index f8772808a7..b92477094b 100644 --- a/src/opts/SkXfermode_opts_SSE2.cpp +++ b/src/opts/SkXfermode_opts_SSE2.cpp @@ -515,17 +515,15 @@ void SkSSE2ProcCoeffXfermode::toString(SkString* str) const { SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, SkXfermode::Mode mode) { SkXfermodeProcSIMD proc = nullptr; + // TODO(mtklein): implement these Sk4px. switch (mode) { - // TODO(mtklein): Sk4pxXfermode has these now. Clean up. case SkProcCoeffXfermode::kOverlay_Mode: proc = overlay_modeproc_SSE2; break; case SkProcCoeffXfermode::kDarken_Mode: proc = darken_modeproc_SSE2; break; case SkProcCoeffXfermode::kLighten_Mode: proc = lighten_modeproc_SSE2; break; - case SkProcCoeffXfermode::kHardLight_Mode: proc = hardlight_modeproc_SSE2; break; - - // TODO(mtklein): implement these with SkPMFloat. - case SkProcCoeffXfermode::kSoftLight_Mode: proc = softlight_modeproc_SSE2; break; case SkProcCoeffXfermode::kColorDodge_Mode: proc = colordodge_modeproc_SSE2; break; case SkProcCoeffXfermode::kColorBurn_Mode: proc = colorburn_modeproc_SSE2; break; + case SkProcCoeffXfermode::kHardLight_Mode: proc = hardlight_modeproc_SSE2; break; + case SkProcCoeffXfermode::kSoftLight_Mode: proc = softlight_modeproc_SSE2; break; default: break; } return proc ? SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, (void*)proc)) : nullptr; diff --git a/src/opts/SkXfermode_opts_arm_neon.cpp b/src/opts/SkXfermode_opts_arm_neon.cpp index 205a00b4ce..1759429c57 100644 --- a/src/opts/SkXfermode_opts_arm_neon.cpp +++ b/src/opts/SkXfermode_opts_arm_neon.cpp @@ -1016,7 +1016,6 @@ SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec, if (auto xfermode = SkCreate4pxXfermode(rec, mode)) { return xfermode; } - // TODO: Sk4pxXfermode now covers every mode found in this file. Delete them all! if (auto proc = gNEONXfermodeProcs[mode]) { return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, (void*)proc)); } |