diff options
author | Herb Derby <herb@google.com> | 2017-07-12 13:01:35 -0400 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2017-07-12 20:36:38 +0000 |
commit | d4b2c537d058ad4cb890ba116d00aa86c3416c08 (patch) | |
tree | 7c80a46e853448f5390df2b7d80dc5fe927ff557 /src/core/SkMaskBlurFilter.cpp | |
parent | 80a82dff82d11c229d33e87453e8510cbd1603b9 (diff) |
Experimental blur code with 32 bit fix.
This uses a new method of blurring that runs the three
passes of the box filter in a single pass. This implementation
currently only does 1x1 pixel at a time, but it should be simple
to expand to 4x4 pixels at a time.
On the blur_10_normal_high_quality benchmark, the new is 7% faster
than the old code. For the blur_100.50_normal_high_quality
benchmark, the new code is 11% slower.
Bug: skia:
Change-Id: I847270906b0ceac1dfbf43ab5446756689ef660f
Reviewed-on: https://skia-review.googlesource.com/22700
Reviewed-by: Mike Reed <reed@google.com>
Commit-Queue: Herb Derby <herb@google.com>
Diffstat (limited to 'src/core/SkMaskBlurFilter.cpp')
-rw-r--r-- | src/core/SkMaskBlurFilter.cpp | 267 |
1 files changed, 267 insertions, 0 deletions
diff --git a/src/core/SkMaskBlurFilter.cpp b/src/core/SkMaskBlurFilter.cpp new file mode 100644 index 0000000000..0f0286354c --- /dev/null +++ b/src/core/SkMaskBlurFilter.cpp @@ -0,0 +1,267 @@ +/* + * Copyright 2017 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "SkMaskBlurFilter.h" + +#include <cmath> + +#include "SkMakeUnique.h" + +static const double kPi = 3.14159265358979323846264338327950288; + +static uint64_t weight_from_diameter(uint32_t d) { + uint64_t d2 = d * d; + uint64_t d3 = d2 * d; + if ((d&1) == 0) { + // d * d * (d + 1); + return d3 + d2; + } + + return d3; +} + +static uint32_t filter_window(double sigma) { + auto possibleWindow = static_cast<uint32_t>(floor(sigma * 3 * sqrt(2*kPi)/4 + 0.5)); + return std::max(1u, possibleWindow); +} + +SkMaskBlurFilter::FilterInfo::FilterInfo(double sigma) + : fFilterWindow{filter_window(sigma)} + , fScaledWeight{(static_cast<uint64_t>(1) << 32) / weight_from_diameter(fFilterWindow)} {} + +uint64_t SkMaskBlurFilter::FilterInfo::weight() const { + return weight_from_diameter(fFilterWindow); + +} +uint32_t SkMaskBlurFilter::FilterInfo::borderSize() const { + if ((fFilterWindow&1) == 0) { + return 3 * (fFilterWindow / 2) - 1; + } + return 3 * (fFilterWindow / 2); +} + +size_t SkMaskBlurFilter::FilterInfo::diameter(uint8_t pass) const { + SkASSERT(pass <= 2); + + if ((fFilterWindow&1) == 0) { + // Handle even case. + switch (pass) { + case 0: return fFilterWindow; + case 1: return fFilterWindow; + case 2: return fFilterWindow+1; + } + } + + return fFilterWindow; +} + +uint64_t SkMaskBlurFilter::FilterInfo::scaledWeight() const { + return fScaledWeight; +} + +SkMaskBlurFilter::SkMaskBlurFilter(double sigmaW, double sigmaH) + : fInfoW{sigmaW}, fInfoH{sigmaH} + , fBuffer0{skstd::make_unique_default<uint32_t[]>(bufferSize(0))} + , fBuffer1{skstd::make_unique_default<uint32_t[]>(bufferSize(1))} + , fBuffer2{skstd::make_unique_default<uint32_t[]>(bufferSize(2))} { +} + +SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const { + + uint64_t weightW = fInfoW.weight(); + uint64_t weightH = fInfoH.weight(); + + size_t borderW = fInfoW.borderSize(); + size_t borderH = fInfoH.borderSize(); + + size_t srcW = src.fBounds.width(); + size_t srcH = src.fBounds.height(); + + size_t dstW = srcW + 2 * borderW; + size_t dstH = srcH + 2 * borderH; + + dst->fBounds.set(0, 0, dstW, dstH); + dst->fBounds.offset(src.fBounds.x(), src.fBounds.y()); + dst->fBounds.offset(-SkTo<int32_t>(borderW), -SkTo<int32_t>(borderH)); + + dst->fImage = nullptr; + dst->fRowBytes = dstW; + dst->fFormat = SkMask::kA8_Format; + + if (src.fImage == nullptr) { + return {SkTo<int32_t>(borderW), SkTo<int32_t>(borderH)}; + } + + dst->fImage = SkMask::AllocImage(dstW * dstH); + + if (weightW > 1 && weightH > 1) { + // Blur both directions. + size_t tmpW = srcH; + size_t tmpH = dstW; + auto tmp = skstd::make_unique_default<uint8_t[]>(tmpW * tmpH); + + // Blur horizontally, and transpose. + for (size_t y = 0; y < srcH; y++) { + auto srcStart = &src.fImage[y * src.fRowBytes]; + auto tmpStart = &tmp[y]; + this->blurOneScan(fInfoW, + srcStart, 1, srcStart + srcW, + tmpStart, tmpW, tmpStart + tmpW * tmpH); + } + + // Blur vertically (scan in memory order because of the transposition), + // and transpose back to the original orientation. + for (size_t y = 0; y < tmpH; y++) { + auto tmpStart = &tmp[y * tmpW]; + auto dstStart = &dst->fImage[y]; + this->blurOneScan(fInfoH, + tmpStart, 1, tmpStart + tmpW, + dstStart, dst->fRowBytes, dstStart + dst->fRowBytes * dstH); + } + } else if (weightW > 1) { + // Blur only horizontally. + + for (size_t y = 0; y < srcH; y++) { + auto srcStart = &src.fImage[y * src.fRowBytes]; + auto dstStart = &dst->fImage[y * dst->fRowBytes]; + this->blurOneScan(fInfoW, + srcStart, 1, srcStart + srcW, + dstStart, 1, dstStart + dstW); + } + } else if (weightH > 1) { + // Blur only vertically. + + for (size_t x = 0; x < srcW; x++) { + auto srcStart = &src.fImage[x]; + auto srcEnd = &src.fImage[src.fRowBytes * srcH]; + auto dstStart = &dst->fImage[x]; + auto dstEnd = &dst->fImage[dst->fRowBytes * dstH]; + this->blurOneScan(fInfoH, + srcStart, src.fRowBytes, srcEnd, + dstStart, dst->fRowBytes, dstEnd); + } + } else { + // Copy to dst. No Blur. + + for (size_t y = 0; y < srcH; y++) { + std::memcpy(&dst->fImage[y * dst->fRowBytes], &src.fImage[y * src.fRowBytes], dstW); + } + } + + return {SkTo<int32_t>(borderW), SkTo<int32_t>(borderH)}; +} + +size_t SkMaskBlurFilter::bufferSize(uint8_t bufferPass) const { + return std::max(fInfoW.diameter(bufferPass), fInfoH.diameter(bufferPass)) - 1; +} + +// Blur one horizontal scan into the dst. +void SkMaskBlurFilter::blurOneScan( + FilterInfo info, + const uint8_t* src, size_t srcStride, const uint8_t* srcEnd, + uint8_t* dst, size_t dstStride, uint8_t* dstEnd) const { + + auto buffer0Begin = &fBuffer0[0]; + auto buffer1Begin = &fBuffer1[0]; + auto buffer2Begin = &fBuffer2[0]; + + auto buffer0Cursor = buffer0Begin; + auto buffer1Cursor = buffer1Begin; + auto buffer2Cursor = buffer2Begin; + + auto buffer0End = &fBuffer0[0] + info.diameter(0) - 1; + auto buffer1End = &fBuffer1[0] + info.diameter(1) - 1; + auto buffer2End = &fBuffer2[0] + info.diameter(2) - 1; + + std::memset(&fBuffer0[0], 0, (buffer0End - buffer0Begin) * sizeof(fBuffer0[0])); + std::memset(&fBuffer1[0], 0, (buffer1End - buffer1Begin) * sizeof(fBuffer1[0])); + std::memset(&fBuffer2[0], 0, (buffer2End - buffer2Begin) * sizeof(fBuffer2[0])); + + uint32_t sum0 = 0; + uint32_t sum1 = 0; + uint32_t sum2 = 0; + + const uint64_t half = static_cast<uint64_t>(1) << 31; + + // Consume the source generating pixels. + for (auto srcCursor = src; srcCursor < srcEnd; dst += dstStride, srcCursor += srcStride) { + uint32_t s = *srcCursor; + sum0 += s; + sum1 += sum0; + sum2 += sum1; + + *dst = SkTo<uint8_t>((info.scaledWeight() * sum2 + half) >> 32); + + sum2 -= *buffer2Cursor; + *buffer2Cursor = sum1; + buffer2Cursor = (buffer2Cursor + 1) < buffer2End ? buffer2Cursor + 1 : &fBuffer2[0]; + + sum1 -= *buffer1Cursor; + *buffer1Cursor = sum0; + buffer1Cursor = (buffer1Cursor + 1) < buffer1End ? buffer1Cursor + 1 : &fBuffer1[0]; + + sum0 -= *buffer0Cursor; + *buffer0Cursor = s; + buffer0Cursor = (buffer0Cursor + 1) < buffer0End ? buffer0Cursor + 1 : &fBuffer0[0]; + } + + // This handles the case when both ends of the box are not between [src, srcEnd), and both + // are zero at that point. + for (auto i = 0; i < static_cast<ptrdiff_t>(2 * info.borderSize()) - (srcEnd - src); i++) { + uint32_t s = 0; + sum0 += s; + sum1 += sum0; + sum2 += sum1; + + *dst = SkTo<uint8_t>((info.scaledWeight() * sum2 + half) >> 32); + + sum2 -= *buffer2Cursor; + *buffer2Cursor = sum1; + buffer2Cursor = (buffer2Cursor + 1) < buffer2End ? buffer2Cursor + 1 : &fBuffer2[0]; + + sum1 -= *buffer1Cursor; + *buffer1Cursor = sum0; + buffer1Cursor = (buffer1Cursor + 1) < buffer1End ? buffer1Cursor + 1 : &fBuffer1[0]; + + sum0 -= *buffer0Cursor; + *buffer0Cursor = s; + buffer0Cursor = (buffer0Cursor + 1) < buffer0End ? buffer0Cursor + 1 : &fBuffer0[0]; + dst += dstStride; + } + + // Starting from the right, fill in the rest of the buffer. + std::memset(&fBuffer0[0], 0, (buffer0End - &fBuffer0[0]) * sizeof(fBuffer0[0])); + std::memset(&fBuffer1[0], 0, (buffer1End - &fBuffer1[0]) * sizeof(fBuffer1[0])); + std::memset(&fBuffer2[0], 0, (buffer2End - &fBuffer2[0]) * sizeof(fBuffer2[0])); + + sum0 = sum1 = sum2 = 0; + + uint8_t* dstCursor = dstEnd; + const uint8_t* srcCursor = srcEnd; + do { + dstCursor -= dstStride; + srcCursor -= srcStride; + uint32_t s = *srcCursor; + sum0 += s; + sum1 += sum0; + sum2 += sum1; + + *dstCursor = SkTo<uint8_t>((info.scaledWeight() * sum2 + half) >> 32); + + sum2 -= *buffer2Cursor; + *buffer2Cursor = sum1; + buffer2Cursor = (buffer2Cursor + 1) < buffer2End ? buffer2Cursor + 1 : &fBuffer2[0]; + + sum1 -= *buffer1Cursor; + *buffer1Cursor = sum0; + buffer1Cursor = (buffer1Cursor + 1) < buffer1End ? buffer1Cursor + 1 : &fBuffer1[0]; + + sum0 -= *buffer0Cursor; + *buffer0Cursor = s; + buffer0Cursor = (buffer0Cursor + 1) < buffer0End ? buffer0Cursor + 1 : &fBuffer0[0]; + } while (dstCursor > dst); +} |