diff options
author | reed <reed@google.com> | 2016-01-16 09:23:48 -0800 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2016-01-16 09:23:49 -0800 |
commit | 326253ef22d39e72a729e8069e54b34ade72ad1b (patch) | |
tree | 28d584d7b94316686ea436bd59502e4109e7e740 /src | |
parent | 0b64b98d8d5c86e31f7c2a14789b4d1537749020 (diff) |
speedup mip builders
1. push the inner-loop into the procs
2. for the 3x3 and 3x2 cases, skip the redundant horizontal read/expand
Some before/after timings (unfortunately there's a lot of variance) -- on MacBook Air
Before
9/9 MB 1 1.45ms 1.65ms 1.92ms 2.93ms 31% ▂█▂▁▁▁▂█▆▂ nonrendering mipmap_build_512x512
9/9 MB 1 1.85ms 2.33ms 2.47ms 3.69ms 28% ▃██▄▃▂▁▂▁▁ nonrendering mipmap_build_511x512
9/9 MB 1 2.15ms 2.21ms 2.37ms 3.28ms 15% █▂▂▁▁▁▁▅▁▁ nonrendering mipmap_build_512x511
9/9 MB 1 2.74ms 3.9ms 4.03ms 5.89ms 25% ▄▂▃▄█▂▁▂▇▅ nonrendering mipmap_build_511x511
After
10/10 MB 1 1.08ms 1.09ms 1.1ms 1.18ms 3% ▁▁▁▁▁▁▁█▃▁ nonrendering mipmap_build_512x512
10/10 MB 1 1.22ms 1.44ms 1.66ms 2.83ms 30% ▂▂▄▁▁▃█▅▂▁ nonrendering mipmap_build_511x512
10/10 MB 1 1.45ms 1.91ms 2.04ms 3.75ms 36% ▁▁▁▃▅█▃▂▂▂ nonrendering mipmap_build_512x511
10/10 MB 1 1.7ms 1.7ms 1.81ms 2.41ms 13% █▁▁▁▁▁▁▁▁▄ nonrendering mipmap_build_511x511
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1593073002
Review URL: https://codereview.chromium.org/1593073002
Diffstat (limited to 'src')
-rw-r--r-- | src/core/SkMipMap.cpp | 160 |
1 files changed, 87 insertions, 73 deletions
diff --git a/src/core/SkMipMap.cpp b/src/core/SkMipMap.cpp index 6e921c7942..eba50bcb4d 100644 --- a/src/core/SkMipMap.cpp +++ b/src/core/SkMipMap.cpp @@ -383,66 +383,95 @@ template <typename T> T add_121(T a, T b, T c) { // we need to sample in each dimension to produce 1 dst pixel. // -template <typename F> void downsample_2_2(void* dst, const void* src, size_t srcRB) { +template <typename F> void downsample_2_2(void* dst, const void* src, size_t srcRB, int count) { auto p0 = static_cast<const typename F::Type*>(src); auto p1 = (const typename F::Type*)((const char*)p0 + srcRB); + auto d = static_cast<typename F::Type*>(dst); - auto c00 = F::Expand(p0[0]); - auto c01 = F::Expand(p0[1]); - auto c10 = F::Expand(p1[0]); - auto c11 = F::Expand(p1[1]); + for (int i = 0; i < count; ++i) { + auto c00 = F::Expand(p0[0]); + auto c01 = F::Expand(p0[1]); + auto c10 = F::Expand(p1[0]); + auto c11 = F::Expand(p1[1]); - auto c = c00 + c10 + c01 + c11; - *(typename F::Type*)dst = F::Compact(c >> 2); + auto c = c00 + c10 + c01 + c11; + d[i] = F::Compact(c >> 2); + p0 += 2; + p1 += 2; + } } -template <typename F> void downsample_3_2(void* dst, const void* src, size_t srcRB) { +template <typename F> void downsample_3_2(void* dst, const void* src, size_t srcRB, int count) { + SkASSERT(count > 0); auto p0 = static_cast<const typename F::Type*>(src); auto p1 = (const typename F::Type*)((const char*)p0 + srcRB); - - auto c00 = F::Expand(p0[0]); - auto c01 = F::Expand(p0[1]); - auto c02 = F::Expand(p0[2]); - auto c10 = F::Expand(p1[0]); - auto c11 = F::Expand(p1[1]); - auto c12 = F::Expand(p1[2]); - - auto c = add_121(c00, c01, c02) + add_121(c10, c11, c12); - *(typename F::Type*)dst = F::Compact(c >> 3); + auto d = static_cast<typename F::Type*>(dst); + + auto c02 = F::Expand(p0[0]); + auto c12 = F::Expand(p1[0]); + for (int i = 0; i < count; ++i) { + auto c00 = c02; + auto c01 = F::Expand(p0[1]); + c02 = F::Expand(p0[2]); + auto c10 = c12; + auto c11 = F::Expand(p1[1]); + c12 = F::Expand(p1[2]); + + auto c = add_121(c00, c01, c02) + add_121(c10, c11, c12); + d[i] = F::Compact(c >> 3); + p0 += 2; + p1 += 2; + } } -template <typename F> void downsample_2_3(void* dst, const void* src, size_t srcRB) { +template <typename F> void downsample_2_3(void* dst, const void* src, size_t srcRB, int count) { auto p0 = static_cast<const typename F::Type*>(src); auto p1 = (const typename F::Type*)((const char*)p0 + srcRB); auto p2 = (const typename F::Type*)((const char*)p1 + srcRB); - - auto c00 = F::Expand(p0[0]); - auto c01 = F::Expand(p0[1]); - auto c10 = F::Expand(p1[0]); - auto c11 = F::Expand(p1[1]); - auto c20 = F::Expand(p2[0]); - auto c21 = F::Expand(p2[1]); - auto c = add_121(c00, c10, c20) + add_121(c01, c11, c21); - *(typename F::Type*)dst = F::Compact(c >> 3); + auto d = static_cast<typename F::Type*>(dst); + + for (int i = 0; i < count; ++i) { + auto c00 = F::Expand(p0[0]); + auto c01 = F::Expand(p0[1]); + auto c10 = F::Expand(p1[0]); + auto c11 = F::Expand(p1[1]); + auto c20 = F::Expand(p2[0]); + auto c21 = F::Expand(p2[1]); + + auto c = add_121(c00, c10, c20) + add_121(c01, c11, c21); + d[i] = F::Compact(c >> 3); + p0 += 2; + p1 += 2; + p2 += 2; + } } -template <typename F> void downsample_3_3(void* dst, const void* src, size_t srcRB) { +template <typename F> void downsample_3_3(void* dst, const void* src, size_t srcRB, int count) { auto p0 = static_cast<const typename F::Type*>(src); auto p1 = (const typename F::Type*)((const char*)p0 + srcRB); auto p2 = (const typename F::Type*)((const char*)p1 + srcRB); - - auto c00 = F::Expand(p0[0]); - auto c01 = F::Expand(p0[1]); - auto c02 = F::Expand(p0[2]); - auto c10 = F::Expand(p1[0]); - auto c11 = F::Expand(p1[1]); - auto c12 = F::Expand(p1[2]); - auto c20 = F::Expand(p2[0]); - auto c21 = F::Expand(p2[1]); - auto c22 = F::Expand(p2[2]); - - auto c = add_121(c00, c01, c02) + (add_121(c10, c11, c12) << 1) + add_121(c20, c21, c22); - *(typename F::Type*)dst = F::Compact(c >> 4); + auto d = static_cast<typename F::Type*>(dst); + + auto c02 = F::Expand(p0[0]); + auto c12 = F::Expand(p1[0]); + auto c22 = F::Expand(p2[0]); + for (int i = 0; i < count; ++i) { + auto c00 = c02; + auto c01 = F::Expand(p0[1]); + c02 = F::Expand(p0[2]); + auto c10 = c12; + auto c11 = F::Expand(p1[1]); + c12 = F::Expand(p1[2]); + auto c20 = c22; + auto c21 = F::Expand(p2[1]); + c22 = F::Expand(p2[2]); + + auto c = add_121(c00, c01, c02) + (add_121(c10, c11, c12) << 1) + add_121(c20, c21, c22); + d[i] = F::Compact(c >> 4); + p0 += 2; + p1 += 2; + p2 += 2; + } } /////////////////////////////////////////////////////////////////////////////////////////////////// @@ -459,7 +488,7 @@ size_t SkMipMap::AllocLevelsSize(int levelCount, size_t pixelSize) { } SkMipMap* SkMipMap::Build(const SkBitmap& src, SkDiscardableFactoryProc fact) { - typedef void FilterProc(void*, const void* srcPtr, size_t srcRB); + typedef void FilterProc(void*, const void* srcPtr, size_t srcRB, int count); FilterProc* proc_2_2 = nullptr; FilterProc* proc_2_3 = nullptr; @@ -559,9 +588,21 @@ SkMipMap* SkMipMap::Build(const SkBitmap& src, SkDiscardableFactoryProc fact) { uint32_t rowBytes; SkPixmap srcPM(srcPixmap); - int prevW = width; - int prevH = height; for (int i = 0; i < countLevels; ++i) { + FilterProc* proc; + if (height & 1) { // src-height is 3 + if (width & 1) { // src-width is 3 + proc = proc_3_3; + } else { // src-width is 2 + proc = proc_2_3; + } + } else { // src-height is 2 + if (width & 1) { // src-width is 3 + proc = proc_3_2; + } else { // src-width is 2 + proc = proc_2_2; + } + } width >>= 1; height >>= 1; rowBytes = SkToU32(SkColorTypeMinRowBytes(ct, width)); @@ -574,44 +615,17 @@ SkMipMap* SkMipMap::Build(const SkBitmap& src, SkDiscardableFactoryProc fact) { SkPixmap dstPM(SkImageInfo::Make(width, height, ct, at), addr, rowBytes); - const size_t pixelSize = srcPM.info().bytesPerPixel(); - const void* srcBasePtr = srcPM.addr(); void* dstBasePtr = dstPM.writable_addr(); - FilterProc* proc; - if (prevH & 1) { // src-height is 3 - if (prevW & 1) { // src-width is 3 - proc = proc_3_3; - } else { // src-width is 2 - proc = proc_2_3; - } - } else { // src-height is 2 - if (prevW & 1) { // src-width is 3 - proc = proc_3_2; - } else { // src-width is 2 - proc = proc_2_2; - } - } - const size_t srcRB = srcPM.rowBytes(); for (int y = 0; y < height; y++) { - const void* srcPtr = srcBasePtr; - void* dstPtr = dstBasePtr; - - for (int x = 0; x < width; x++) { - proc(dstPtr, srcPtr, srcRB); - srcPtr = (char*)srcPtr + pixelSize * 2; - dstPtr = (char*)dstPtr + pixelSize; - } - + proc(dstBasePtr, srcBasePtr, srcRB, width); srcBasePtr = (char*)srcBasePtr + srcRB * 2; // jump two rows dstBasePtr = (char*)dstBasePtr + dstPM.rowBytes(); } srcPM = dstPM; addr += height * rowBytes; - prevW = width; - prevH = height; } SkASSERT(addr == baseAddr + size); |