From 3a2e45a6ed50c07cb1a710d3f7b74be796e61251 Mon Sep 17 00:00:00 2001 From: fmalita Date: Fri, 14 Oct 2016 08:18:24 -0700 Subject: Faster 4f gradient premul path Similar to https://codereview.chromium.org/2409583003/, perform the premul in 4f. It turns out it's even faster to avoid the 255 load multiplication in this case. Also includes some template plumbing because DstTraits<>::load now needs to be premul-aware (previously it wasn't). R=reed@google.com GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2416233002 Review-Url: https://codereview.chromium.org/2416233002 --- src/effects/gradients/Sk4fGradientPriv.h | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'src/effects/gradients/Sk4fGradientPriv.h') diff --git a/src/effects/gradients/Sk4fGradientPriv.h b/src/effects/gradients/Sk4fGradientPriv.h index 65fa821e85..b8f4bcaee1 100644 --- a/src/effects/gradients/Sk4fGradientPriv.h +++ b/src/effects/gradients/Sk4fGradientPriv.h @@ -29,17 +29,6 @@ enum class DstType { F32, // Linear float. Used for shaders only. }; -template -inline SkPMColor trunc_from_4f_255(const Sk4f& c) { - SkPMColor pmc; - SkNx_cast(c).store(&pmc); - if (premul == ApplyPremul::True) { - pmc = SkPreMultiplyARGB(SkGetPackedA32(pmc), SkGetPackedR32(pmc), - SkGetPackedG32(pmc), SkGetPackedB32(pmc)); - } - return pmc; -} - template struct PremulTraits; @@ -69,24 +58,34 @@ struct PremulTraits { // // - store4x() Store 4 Sk4f values to dest (opportunistic optimization). // -template +template struct DstTraits; template struct DstTraits { + using PM = PremulTraits; using Type = SkPMColor; - // For L32, we prescale the values by 255 to save a per-pixel multiplication. + // For L32, prescaling by 255 saves a per-pixel multiplication when premul is not needed. static Sk4f load(const SkPM4f& c) { - return c.to4f_pmorder() * Sk4f(255); + return premul == ApplyPremul::False + ? c.to4f_pmorder() * Sk4f(255) + : c.to4f_pmorder(); } static void store(const Sk4f& c, Type* dst) { - *dst = trunc_from_4f_255(c); + if (premul == ApplyPremul::False) { + // c is prescaled by 255, just store. + SkNx_cast(c).store(dst); + } else { + *dst = Sk4f_toL32(PM::apply(c)); + } } static void store(const Sk4f& c, Type* dst, int n) { - sk_memset32(dst, trunc_from_4f_255(c), n); + Type pmc; + store(c, &pmc); + sk_memset32(dst, pmc, n); } static void store4x(const Sk4f& c0, const Sk4f& c1, -- cgit v1.2.3