diff options
author | fmalita <fmalita@chromium.org> | 2016-10-14 08:18:24 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2016-10-14 08:18:24 -0700 |
commit | 3a2e45a6ed50c07cb1a710d3f7b74be796e61251 (patch) | |
tree | f8e903ce7b3f81feda1ca7f44fc0a151549fa463 /src/effects/gradients/Sk4fGradientPriv.h | |
parent | 11abd8d6cb2887bf66711863fb2dfe47da86d979 (diff) |
Faster 4f gradient premul path
Similar to https://codereview.chromium.org/2409583003/, perform the
premul in 4f. It turns out it's even faster to avoid the 255 load
multiplication in this case.
Also includes some template plumbing because DstTraits<>::load now needs
to be premul-aware (previously it wasn't).
R=reed@google.com
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2416233002
Review-Url: https://codereview.chromium.org/2416233002
Diffstat (limited to 'src/effects/gradients/Sk4fGradientPriv.h')
-rw-r--r-- | src/effects/gradients/Sk4fGradientPriv.h | 31 |
1 files changed, 15 insertions, 16 deletions
diff --git a/src/effects/gradients/Sk4fGradientPriv.h b/src/effects/gradients/Sk4fGradientPriv.h index 65fa821e85..b8f4bcaee1 100644 --- a/src/effects/gradients/Sk4fGradientPriv.h +++ b/src/effects/gradients/Sk4fGradientPriv.h @@ -29,17 +29,6 @@ enum class DstType { F32, // Linear float. Used for shaders only. }; -template <ApplyPremul premul> -inline SkPMColor trunc_from_4f_255(const Sk4f& c) { - SkPMColor pmc; - SkNx_cast<uint8_t>(c).store(&pmc); - if (premul == ApplyPremul::True) { - pmc = SkPreMultiplyARGB(SkGetPackedA32(pmc), SkGetPackedR32(pmc), - SkGetPackedG32(pmc), SkGetPackedB32(pmc)); - } - return pmc; -} - template <ApplyPremul> struct PremulTraits; @@ -69,24 +58,34 @@ struct PremulTraits<ApplyPremul::True> { // // - store4x() Store 4 Sk4f values to dest (opportunistic optimization). // -template <DstType, ApplyPremul premul = ApplyPremul::False> +template <DstType, ApplyPremul premul> struct DstTraits; template <ApplyPremul premul> struct DstTraits<DstType::L32, premul> { + using PM = PremulTraits<premul>; using Type = SkPMColor; - // For L32, we prescale the values by 255 to save a per-pixel multiplication. + // For L32, prescaling by 255 saves a per-pixel multiplication when premul is not needed. static Sk4f load(const SkPM4f& c) { - return c.to4f_pmorder() * Sk4f(255); + return premul == ApplyPremul::False + ? c.to4f_pmorder() * Sk4f(255) + : c.to4f_pmorder(); } static void store(const Sk4f& c, Type* dst) { - *dst = trunc_from_4f_255<premul>(c); + if (premul == ApplyPremul::False) { + // c is prescaled by 255, just store. + SkNx_cast<uint8_t>(c).store(dst); + } else { + *dst = Sk4f_toL32(PM::apply(c)); + } } static void store(const Sk4f& c, Type* dst, int n) { - sk_memset32(dst, trunc_from_4f_255<premul>(c), n); + Type pmc; + store(c, &pmc); + sk_memset32(dst, pmc, n); } static void store4x(const Sk4f& c0, const Sk4f& c1, |