From b667fe2f67a072da74d7a7da32cae4f06a2f0ee4 Mon Sep 17 00:00:00 2001 From: Matt Sarett Date: Wed, 22 Mar 2017 09:20:02 -0400 Subject: Small downsample_3_3 optimization Just a little less math... mipmap_build_2047x2047_0_gamma Before: 15.8ms After: 13.4ms Hard to see much difference on the gamma correct mips, which are more dominated by load time. BUG=skia: Change-Id: I978cbc85a7d75cfcca2d5cd3fbc75e93413782f2 Reviewed-on: https://skia-review.googlesource.com/9988 Reviewed-by: Brian Osman Commit-Queue: Matt Sarett --- bench/MipMapBench.cpp | 3 +++ src/core/SkMipMap.cpp | 43 ++++++++++++++++++++++++++----------------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/bench/MipMapBench.cpp b/bench/MipMapBench.cpp index 1ffd624fee..cc5c8aefa3 100644 --- a/bench/MipMapBench.cpp +++ b/bench/MipMapBench.cpp @@ -68,3 +68,6 @@ DEF_BENCH( return new MipMapBench(511, 511, SkDestinationSurfaceColorMode::kLega DEF_BENCH( return new MipMapBench(2048, 2048, SkDestinationSurfaceColorMode::kLegacy); ) DEF_BENCH( return new MipMapBench(2048, 2048, SkDestinationSurfaceColorMode::kGammaAndColorSpaceAware); ) +DEF_BENCH( return new MipMapBench(2047, 2047, SkDestinationSurfaceColorMode::kLegacy); ) +DEF_BENCH( return new MipMapBench(2047, 2047, + SkDestinationSurfaceColorMode::kGammaAndColorSpaceAware); ) diff --git a/src/core/SkMipMap.cpp b/src/core/SkMipMap.cpp index 7a6897e1d9..03341c60c8 100644 --- a/src/core/SkMipMap.cpp +++ b/src/core/SkMipMap.cpp @@ -266,25 +266,34 @@ template void downsample_3_3(void* dst, const void* src, size_t src auto p2 = (const typename F::Type*)((const char*)p1 + srcRB); auto d = static_cast(dst); - auto c02 = F::Expand(p0[0]); - auto c12 = F::Expand(p1[0]); - auto c22 = F::Expand(p2[0]); + // Given pixels: + // a0 b0 c0 d0 e0 ... + // a1 b1 c1 d1 e1 ... + // a2 b2 c2 d2 e2 ... + // We want: + // (a0 + 2*b0 + c0 + 2*a1 + 4*b1 + 2*c1 + a2 + 2*b2 + c2) / 16 + // (c0 + 2*d0 + e0 + 2*c1 + 4*d1 + 2*e1 + c2 + 2*d2 + e2) / 16 + // ... + + auto c0 = F::Expand(p0[0]); + auto c1 = F::Expand(p1[0]); + auto c2 = F::Expand(p2[0]); + auto c = add_121(c0, c1, c2); for (int i = 0; i < count; ++i) { - auto c00 = c02; - auto c01 = F::Expand(p0[1]); - c02 = F::Expand(p0[2]); - auto c10 = c12; - auto c11 = F::Expand(p1[1]); - c12 = F::Expand(p1[2]); - auto c20 = c22; - auto c21 = F::Expand(p2[1]); - c22 = F::Expand(p2[2]); + auto a = c; + + auto b0 = F::Expand(p0[1]); + auto b1 = F::Expand(p1[1]); + auto b2 = F::Expand(p2[1]); + auto b = shift_left(add_121(b0, b1, b2), 1); + + c0 = F::Expand(p0[2]); + c1 = F::Expand(p1[2]); + c2 = F::Expand(p2[2]); + c = add_121(c0, c1, c2); - auto c = - add_121(c00, c01, c02) + - shift_left(add_121(c10, c11, c12), 1) + - add_121(c20, c21, c22); - d[i] = F::Compact(shift_right(c, 4)); + auto sum = a + b + c; + d[i] = F::Compact(shift_right(sum, 4)); p0 += 2; p1 += 2; p2 += 2; -- cgit v1.2.3