From bf907e628fe245305d0f987aed8b8ecff8356374 Mon Sep 17 00:00:00 2001 From: reed Date: Tue, 2 Feb 2016 11:00:55 -0800 Subject: unroll srcover_1 for blending a single color MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before: curr/maxrss loops min median mean max stddev samples config bench 8/8 MB 1 1.59ms 1.82ms 1.89ms 2.59ms 14% ▁█▃▃▃▃▃▃▃▃ nonrendering xfer4f_srcover_1_alpha_linear 8/8 MB 1 3.25ms 4.25ms 4.16ms 5.87ms 21% ▁▅▂▁▁▄█▄▅▂ nonrendering xfer4f_srcover_1_alpha_srgb After: curr/maxrss loops min median mean max stddev samples config bench 8/8 MB 1 915µs 915µs 946µs 1.02ms 4% █▄▇▁▁▁▆▁▁▁ nonrendering xfer4f_srcover_1_alpha_linear 8/8 MB 1 2.69ms 3.08ms 3.03ms 3.63ms 10% ▁▃▂▁▁█▄▄▄▆ nonrendering xfer4f_srcover_1_alpha_srgb BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1653943002 Review URL: https://codereview.chromium.org/1653943002 --- src/core/SkXfermode4f.cpp | 93 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 80 insertions(+), 13 deletions(-) (limited to 'src/core/SkXfermode4f.cpp') diff --git a/src/core/SkXfermode4f.cpp b/src/core/SkXfermode4f.cpp index 0485a5e6ed..1bf66a2b69 100644 --- a/src/core/SkXfermode4f.cpp +++ b/src/core/SkXfermode4f.cpp @@ -31,10 +31,22 @@ template Sk4f load_dst(SkPMColor dstC) { return (D == kSRGB_Dst) ? Sk4f_fromS32(dstC) : Sk4f_fromL32(dstC); } +static Sk4f srgb_4b_to_linear_unit(SkPMColor dstC) { + return Sk4f_fromS32(dstC); +} + template uint32_t store_dst(const Sk4f& x4) { return (D == kSRGB_Dst) ? Sk4f_toS32(x4) : Sk4f_toL32(x4); } +static uint32_t linear_unit_to_srgb_32(const Sk4f& l4) { + return Sk4f_toL32(l4); +} + +static Sk4f linear_unit_to_srgb_255f(const Sk4f& l4) { + return linear_to_srgb(l4) * Sk4f(255) + Sk4f(0.5f); +} + /////////////////////////////////////////////////////////////////////////////////////////////////// static Sk4f scale_255_round(const SkPM4f& pm4) { @@ -254,10 +266,53 @@ template void srcover_n(const SkXfermode::PM4fState& state, uint32_t } } -template void srcover_1(const SkXfermode::PM4fState& state, uint32_t dst[], - const SkPM4f& src, int count, const SkAlpha aa[]) { +static void srcover_linear_dst_1(const SkXfermode::PM4fState& state, uint32_t dst[], + const SkPM4f& src, int count, const SkAlpha aa[]) { + Sk4f s4 = Sk4f::Load(src.fVec); + Sk4f dst_scale = Sk4f(1 - get_alpha(s4)); + + if (aa) { + for (int i = 0; i < count; ++i) { + unsigned a = aa[i]; + if (0 == a) { + continue; + } + Sk4f d4 = Sk4f_fromL32(dst[i]); + Sk4f r4; + if (a != 0xFF) { + s4 = scale_by_coverage(s4, a); + r4 = s4 + d4 * Sk4f(1 - get_alpha(s4)); + } else { + r4 = s4 + d4 * dst_scale; + } + dst[i] = Sk4f_toL32(r4); + } + } else { + s4 = s4 * Sk4f(255) + Sk4f(0.5f); // +0.5 to pre-bias for rounding + while (count >= 4) { + Sk4f d0 = to_4f(dst[0]); + Sk4f d1 = to_4f(dst[1]); + Sk4f d2 = to_4f(dst[2]); + Sk4f d3 = to_4f(dst[3]); + Sk4f_ToBytes((uint8_t*)dst, + s4 + d0 * dst_scale, + s4 + d1 * dst_scale, + s4 + d2 * dst_scale, + s4 + d3 * dst_scale); + dst += 4; + count -= 4; + } + for (int i = 0; i < count; ++i) { + Sk4f d4 = to_4f(dst[i]); + dst[i] = to_4b(s4 + d4 * dst_scale); + } + } +} + +static void srcover_srgb_dst_1(const SkXfermode::PM4fState& state, uint32_t dst[], + const SkPM4f& src, int count, const SkAlpha aa[]) { Sk4f s4 = Sk4f::Load(src.fVec); - Sk4f scale = Sk4f(1 - get_alpha(s4)); + Sk4f dst_scale = Sk4f(1 - get_alpha(s4)); if (aa) { for (int i = 0; i < count; ++i) { @@ -265,30 +320,42 @@ template void srcover_1(const SkXfermode::PM4fState& state, uint32_t if (0 == a) { continue; } - Sk4f d4 = load_dst(dst[i]); + Sk4f d4 = srgb_4b_to_linear_unit(dst[i]); Sk4f r4; if (a != 0xFF) { s4 = scale_by_coverage(s4, a); r4 = s4 + d4 * Sk4f(1 - get_alpha(s4)); } else { - r4 = s4 + d4 * scale; + r4 = s4 + d4 * dst_scale; } - dst[i] = store_dst(r4); + dst[i] = linear_unit_to_srgb_32(r4); } } else { + while (count >= 4) { + Sk4f d0 = srgb_4b_to_linear_unit(dst[0]); + Sk4f d1 = srgb_4b_to_linear_unit(dst[1]); + Sk4f d2 = srgb_4b_to_linear_unit(dst[2]); + Sk4f d3 = srgb_4b_to_linear_unit(dst[3]); + Sk4f_ToBytes((uint8_t*)dst, + linear_unit_to_srgb_255f(s4 + d0 * dst_scale), + linear_unit_to_srgb_255f(s4 + d1 * dst_scale), + linear_unit_to_srgb_255f(s4 + d2 * dst_scale), + linear_unit_to_srgb_255f(s4 + d3 * dst_scale)); + dst += 4; + count -= 4; + } for (int i = 0; i < count; ++i) { - Sk4f d4 = load_dst(dst[i]); - Sk4f r4 = s4 + d4 * scale; - dst[i] = store_dst(r4); + Sk4f d4 = srgb_4b_to_linear_unit(dst[i]); + dst[i] = to_4b(linear_unit_to_srgb_255f(s4 + d4 * dst_scale)); } } } const XferProcPair gProcs_SrcOver[] = { - { srcover_1, srcover_n }, // linear alpha - { src_1, src_n }, // linear opaque [ we are src-mode ] - { srcover_1, srcover_n }, // srgb alpha - { src_1, src_n }, // srgb opaque [ we are src-mode ] + { srcover_linear_dst_1, srcover_n }, // linear alpha + { src_1, src_n }, // linear opaque [ we are src-mode ] + { srcover_srgb_dst_1, srcover_n }, // srgb alpha + { src_1, src_n }, // srgb opaque [ we are src-mode ] }; /////////////////////////////////////////////////////////////////////////////////////////////////// -- cgit v1.2.3