diff options
author | Herb Derby <herb@google.com> | 2017-07-12 13:01:35 -0400 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2017-07-12 20:36:38 +0000 |
commit | d4b2c537d058ad4cb890ba116d00aa86c3416c08 (patch) | |
tree | 7c80a46e853448f5390df2b7d80dc5fe927ff557 | |
parent | 80a82dff82d11c229d33e87453e8510cbd1603b9 (diff) |
Experimental blur code with 32 bit fix.
This uses a new method of blurring that runs the three
passes of the box filter in a single pass. This implementation
currently only does 1x1 pixel at a time, but it should be simple
to expand to 4x4 pixels at a time.
On the blur_10_normal_high_quality benchmark, the new is 7% faster
than the old code. For the blur_100.50_normal_high_quality
benchmark, the new code is 11% slower.
Bug: skia:
Change-Id: I847270906b0ceac1dfbf43ab5446756689ef660f
Reviewed-on: https://skia-review.googlesource.com/22700
Reviewed-by: Mike Reed <reed@google.com>
Commit-Queue: Herb Derby <herb@google.com>
-rw-r--r-- | gn/core.gni | 2 | ||||
-rw-r--r-- | src/core/SkMakeUnique.h | 5 | ||||
-rw-r--r-- | src/core/SkMaskBlurFilter.cpp | 267 | ||||
-rw-r--r-- | src/core/SkMaskBlurFilter.h | 67 | ||||
-rw-r--r-- | src/effects/SkBlurMask.cpp | 96 |
5 files changed, 399 insertions, 38 deletions
diff --git a/gn/core.gni b/gn/core.gni index 187a6de902..39a80fef85 100644 --- a/gn/core.gni +++ b/gn/core.gni @@ -178,6 +178,8 @@ skia_core_sources = [ "$_src/core/SkMD5.h", "$_src/core/SkMallocPixelRef.cpp", "$_src/core/SkMask.cpp", + "$_src/core/SkMaskBlurFilter.h", + "$_src/core/SkMaskBlurFilter.cpp", "$_src/core/SkMaskCache.cpp", "$_src/core/SkMaskFilter.cpp", "$_src/core/SkMaskGamma.cpp", diff --git a/src/core/SkMakeUnique.h b/src/core/SkMakeUnique.h index 188eb05ff4..860ea2e8a7 100644 --- a/src/core/SkMakeUnique.h +++ b/src/core/SkMakeUnique.h @@ -18,6 +18,11 @@ std::unique_ptr<T> make_unique(Args&&... args) { return std::unique_ptr<T>(new T(std::forward<Args>(args)...)); } +template<typename T> +std::unique_ptr<T> make_unique_default(size_t n) { + return std::unique_ptr<T>(new typename std::remove_extent<T>::type[n]); +} + } #endif // SkMakeUnique_DEFINED diff --git a/src/core/SkMaskBlurFilter.cpp b/src/core/SkMaskBlurFilter.cpp new file mode 100644 index 0000000000..0f0286354c --- /dev/null +++ b/src/core/SkMaskBlurFilter.cpp @@ -0,0 +1,267 @@ +/* + * Copyright 2017 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "SkMaskBlurFilter.h" + +#include <cmath> + +#include "SkMakeUnique.h" + +static const double kPi = 3.14159265358979323846264338327950288; + +static uint64_t weight_from_diameter(uint32_t d) { + uint64_t d2 = d * d; + uint64_t d3 = d2 * d; + if ((d&1) == 0) { + // d * d * (d + 1); + return d3 + d2; + } + + return d3; +} + +static uint32_t filter_window(double sigma) { + auto possibleWindow = static_cast<uint32_t>(floor(sigma * 3 * sqrt(2*kPi)/4 + 0.5)); + return std::max(1u, possibleWindow); +} + +SkMaskBlurFilter::FilterInfo::FilterInfo(double sigma) + : fFilterWindow{filter_window(sigma)} + , fScaledWeight{(static_cast<uint64_t>(1) << 32) / weight_from_diameter(fFilterWindow)} {} + +uint64_t SkMaskBlurFilter::FilterInfo::weight() const { + return weight_from_diameter(fFilterWindow); + +} +uint32_t SkMaskBlurFilter::FilterInfo::borderSize() const { + if ((fFilterWindow&1) == 0) { + return 3 * (fFilterWindow / 2) - 1; + } + return 3 * (fFilterWindow / 2); +} + +size_t SkMaskBlurFilter::FilterInfo::diameter(uint8_t pass) const { + SkASSERT(pass <= 2); + + if ((fFilterWindow&1) == 0) { + // Handle even case. + switch (pass) { + case 0: return fFilterWindow; + case 1: return fFilterWindow; + case 2: return fFilterWindow+1; + } + } + + return fFilterWindow; +} + +uint64_t SkMaskBlurFilter::FilterInfo::scaledWeight() const { + return fScaledWeight; +} + +SkMaskBlurFilter::SkMaskBlurFilter(double sigmaW, double sigmaH) + : fInfoW{sigmaW}, fInfoH{sigmaH} + , fBuffer0{skstd::make_unique_default<uint32_t[]>(bufferSize(0))} + , fBuffer1{skstd::make_unique_default<uint32_t[]>(bufferSize(1))} + , fBuffer2{skstd::make_unique_default<uint32_t[]>(bufferSize(2))} { +} + +SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const { + + uint64_t weightW = fInfoW.weight(); + uint64_t weightH = fInfoH.weight(); + + size_t borderW = fInfoW.borderSize(); + size_t borderH = fInfoH.borderSize(); + + size_t srcW = src.fBounds.width(); + size_t srcH = src.fBounds.height(); + + size_t dstW = srcW + 2 * borderW; + size_t dstH = srcH + 2 * borderH; + + dst->fBounds.set(0, 0, dstW, dstH); + dst->fBounds.offset(src.fBounds.x(), src.fBounds.y()); + dst->fBounds.offset(-SkTo<int32_t>(borderW), -SkTo<int32_t>(borderH)); + + dst->fImage = nullptr; + dst->fRowBytes = dstW; + dst->fFormat = SkMask::kA8_Format; + + if (src.fImage == nullptr) { + return {SkTo<int32_t>(borderW), SkTo<int32_t>(borderH)}; + } + + dst->fImage = SkMask::AllocImage(dstW * dstH); + + if (weightW > 1 && weightH > 1) { + // Blur both directions. + size_t tmpW = srcH; + size_t tmpH = dstW; + auto tmp = skstd::make_unique_default<uint8_t[]>(tmpW * tmpH); + + // Blur horizontally, and transpose. + for (size_t y = 0; y < srcH; y++) { + auto srcStart = &src.fImage[y * src.fRowBytes]; + auto tmpStart = &tmp[y]; + this->blurOneScan(fInfoW, + srcStart, 1, srcStart + srcW, + tmpStart, tmpW, tmpStart + tmpW * tmpH); + } + + // Blur vertically (scan in memory order because of the transposition), + // and transpose back to the original orientation. + for (size_t y = 0; y < tmpH; y++) { + auto tmpStart = &tmp[y * tmpW]; + auto dstStart = &dst->fImage[y]; + this->blurOneScan(fInfoH, + tmpStart, 1, tmpStart + tmpW, + dstStart, dst->fRowBytes, dstStart + dst->fRowBytes * dstH); + } + } else if (weightW > 1) { + // Blur only horizontally. + + for (size_t y = 0; y < srcH; y++) { + auto srcStart = &src.fImage[y * src.fRowBytes]; + auto dstStart = &dst->fImage[y * dst->fRowBytes]; + this->blurOneScan(fInfoW, + srcStart, 1, srcStart + srcW, + dstStart, 1, dstStart + dstW); + } + } else if (weightH > 1) { + // Blur only vertically. + + for (size_t x = 0; x < srcW; x++) { + auto srcStart = &src.fImage[x]; + auto srcEnd = &src.fImage[src.fRowBytes * srcH]; + auto dstStart = &dst->fImage[x]; + auto dstEnd = &dst->fImage[dst->fRowBytes * dstH]; + this->blurOneScan(fInfoH, + srcStart, src.fRowBytes, srcEnd, + dstStart, dst->fRowBytes, dstEnd); + } + } else { + // Copy to dst. No Blur. + + for (size_t y = 0; y < srcH; y++) { + std::memcpy(&dst->fImage[y * dst->fRowBytes], &src.fImage[y * src.fRowBytes], dstW); + } + } + + return {SkTo<int32_t>(borderW), SkTo<int32_t>(borderH)}; +} + +size_t SkMaskBlurFilter::bufferSize(uint8_t bufferPass) const { + return std::max(fInfoW.diameter(bufferPass), fInfoH.diameter(bufferPass)) - 1; +} + +// Blur one horizontal scan into the dst. +void SkMaskBlurFilter::blurOneScan( + FilterInfo info, + const uint8_t* src, size_t srcStride, const uint8_t* srcEnd, + uint8_t* dst, size_t dstStride, uint8_t* dstEnd) const { + + auto buffer0Begin = &fBuffer0[0]; + auto buffer1Begin = &fBuffer1[0]; + auto buffer2Begin = &fBuffer2[0]; + + auto buffer0Cursor = buffer0Begin; + auto buffer1Cursor = buffer1Begin; + auto buffer2Cursor = buffer2Begin; + + auto buffer0End = &fBuffer0[0] + info.diameter(0) - 1; + auto buffer1End = &fBuffer1[0] + info.diameter(1) - 1; + auto buffer2End = &fBuffer2[0] + info.diameter(2) - 1; + + std::memset(&fBuffer0[0], 0, (buffer0End - buffer0Begin) * sizeof(fBuffer0[0])); + std::memset(&fBuffer1[0], 0, (buffer1End - buffer1Begin) * sizeof(fBuffer1[0])); + std::memset(&fBuffer2[0], 0, (buffer2End - buffer2Begin) * sizeof(fBuffer2[0])); + + uint32_t sum0 = 0; + uint32_t sum1 = 0; + uint32_t sum2 = 0; + + const uint64_t half = static_cast<uint64_t>(1) << 31; + + // Consume the source generating pixels. + for (auto srcCursor = src; srcCursor < srcEnd; dst += dstStride, srcCursor += srcStride) { + uint32_t s = *srcCursor; + sum0 += s; + sum1 += sum0; + sum2 += sum1; + + *dst = SkTo<uint8_t>((info.scaledWeight() * sum2 + half) >> 32); + + sum2 -= *buffer2Cursor; + *buffer2Cursor = sum1; + buffer2Cursor = (buffer2Cursor + 1) < buffer2End ? buffer2Cursor + 1 : &fBuffer2[0]; + + sum1 -= *buffer1Cursor; + *buffer1Cursor = sum0; + buffer1Cursor = (buffer1Cursor + 1) < buffer1End ? buffer1Cursor + 1 : &fBuffer1[0]; + + sum0 -= *buffer0Cursor; + *buffer0Cursor = s; + buffer0Cursor = (buffer0Cursor + 1) < buffer0End ? buffer0Cursor + 1 : &fBuffer0[0]; + } + + // This handles the case when both ends of the box are not between [src, srcEnd), and both + // are zero at that point. + for (auto i = 0; i < static_cast<ptrdiff_t>(2 * info.borderSize()) - (srcEnd - src); i++) { + uint32_t s = 0; + sum0 += s; + sum1 += sum0; + sum2 += sum1; + + *dst = SkTo<uint8_t>((info.scaledWeight() * sum2 + half) >> 32); + + sum2 -= *buffer2Cursor; + *buffer2Cursor = sum1; + buffer2Cursor = (buffer2Cursor + 1) < buffer2End ? buffer2Cursor + 1 : &fBuffer2[0]; + + sum1 -= *buffer1Cursor; + *buffer1Cursor = sum0; + buffer1Cursor = (buffer1Cursor + 1) < buffer1End ? buffer1Cursor + 1 : &fBuffer1[0]; + + sum0 -= *buffer0Cursor; + *buffer0Cursor = s; + buffer0Cursor = (buffer0Cursor + 1) < buffer0End ? buffer0Cursor + 1 : &fBuffer0[0]; + dst += dstStride; + } + + // Starting from the right, fill in the rest of the buffer. + std::memset(&fBuffer0[0], 0, (buffer0End - &fBuffer0[0]) * sizeof(fBuffer0[0])); + std::memset(&fBuffer1[0], 0, (buffer1End - &fBuffer1[0]) * sizeof(fBuffer1[0])); + std::memset(&fBuffer2[0], 0, (buffer2End - &fBuffer2[0]) * sizeof(fBuffer2[0])); + + sum0 = sum1 = sum2 = 0; + + uint8_t* dstCursor = dstEnd; + const uint8_t* srcCursor = srcEnd; + do { + dstCursor -= dstStride; + srcCursor -= srcStride; + uint32_t s = *srcCursor; + sum0 += s; + sum1 += sum0; + sum2 += sum1; + + *dstCursor = SkTo<uint8_t>((info.scaledWeight() * sum2 + half) >> 32); + + sum2 -= *buffer2Cursor; + *buffer2Cursor = sum1; + buffer2Cursor = (buffer2Cursor + 1) < buffer2End ? buffer2Cursor + 1 : &fBuffer2[0]; + + sum1 -= *buffer1Cursor; + *buffer1Cursor = sum0; + buffer1Cursor = (buffer1Cursor + 1) < buffer1End ? buffer1Cursor + 1 : &fBuffer1[0]; + + sum0 -= *buffer0Cursor; + *buffer0Cursor = s; + buffer0Cursor = (buffer0Cursor + 1) < buffer0End ? buffer0Cursor + 1 : &fBuffer0[0]; + } while (dstCursor > dst); +} diff --git a/src/core/SkMaskBlurFilter.h b/src/core/SkMaskBlurFilter.h new file mode 100644 index 0000000000..9becadca39 --- /dev/null +++ b/src/core/SkMaskBlurFilter.h @@ -0,0 +1,67 @@ +/* + * Copyright 2017 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkBlurMaskFilter_DEFINED +#define SkBlurMaskFilter_DEFINED + +#include <algorithm> +#include <memory> + +#include "SkMask.h" +#include "SkTypes.h" + +// Implement a single channel Gaussian blur. The specifics for implementation are taken from: +// https://drafts.fxtf.org/filters/#feGaussianBlurElement +class SkMaskBlurFilter { +public: + // Given a filter specified by sigma, generate various quantities. + class FilterInfo { + public: + explicit FilterInfo(double sigma); + + // The final weight to divide by given a box size calculated from sigma accumulated for + // all three passes. For example, if the box size is 5, then the final weight for all + // three passes is 5^3 or 125. + uint64_t weight() const; + + // The distance between the first value of the dst and the first value of the src. + uint32_t borderSize() const; + + // The size of the box filter. + size_t diameter(uint8_t) const; + + // A factor used to simulate division using multiplication and shift. + uint64_t scaledWeight() const; + + private: + const uint32_t fFilterWindow; + const uint64_t fScaledWeight; + }; + + // Create an object suitable for filtering an SkMask using a filter with width sigmaW and + // height sigmaH. + SkMaskBlurFilter(double sigmaW, double sigmaH); + + // Given a src SkMask, generate dst SkMask returning the border width and height. + SkIPoint blur(const SkMask& src, SkMask* dst) const; + +private: + size_t bufferSize(uint8_t bufferPass) const; + + void blurOneScan(FilterInfo gen, + const uint8_t* src, size_t srcStride, const uint8_t* srcEnd, + uint8_t* dst, size_t dstStride, uint8_t* dstEnd) const; + + + const FilterInfo fInfoW, + fInfoH; + std::unique_ptr<uint32_t[]> fBuffer0, + fBuffer1, + fBuffer2; +}; + +#endif // SkBlurMaskFilter_DEFINED diff --git a/src/effects/SkBlurMask.cpp b/src/effects/SkBlurMask.cpp index eee16313f6..fe59ab8269 100644 --- a/src/effects/SkBlurMask.cpp +++ b/src/effects/SkBlurMask.cpp @@ -7,6 +7,7 @@ #include "SkBlurMask.h" +#include "SkMaskBlurFilter.h" #include "SkMath.h" #include "SkTemplates.h" #include "SkEndian.h" @@ -406,13 +407,7 @@ static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst, return new_width; } -static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius) -{ - *loRadius = *hiRadius = SkScalarCeilToInt(passRadius); - if (SkIntToScalar(*hiRadius) - passRadius > 0.5f) { - *loRadius = *hiRadius - 1; - } -} + #include "SkColorPriv.h" @@ -487,6 +482,17 @@ bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src, return false; } + SkIPoint border; + +#ifdef SK_SUPPORT_LEGACY_MASK_BLUR + + auto get_adjusted_radii = [](SkScalar passRadius, int *loRadius, int *hiRadius) { + *loRadius = *hiRadius = SkScalarCeilToInt(passRadius); + if (SkIntToScalar(*hiRadius) - passRadius > 0.5f) { + *loRadius = *hiRadius - 1; + } + }; + // Force high quality off for small radii (performance) if (!force_quality && sigma <= SkIntToScalar(2)) { quality = kLow_SkBlurQuality; @@ -496,12 +502,12 @@ bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src, if (kHigh_SkBlurQuality == quality) { // For the high quality path the 3 pass box blur kernel width is // 6*rad+1 while the full Gaussian width is 6*sigma. - passRadius = sigma - (1/6.0f); + passRadius = sigma - (1 / 6.0f); } else { // For the low quality path we only attempt to cover 3*sigma of the // Gaussian blur area (1.5*sigma on each side). The single pass box // blur's kernel size is 2*rad+1. - passRadius = 1.5f*sigma - 0.5f; + passRadius = 1.5f * sigma - 0.5f; } // highQuality: use three box blur passes as a cheap way @@ -522,9 +528,8 @@ bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src, int padx = passCount * rx; int pady = passCount * ry; - if (margin) { - margin->set(padx, pady); - } + border = {padx, pady}; + dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady, src.fBounds.fRight + padx, src.fBounds.fBottom + pady); @@ -538,15 +543,15 @@ bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src, return false; // too big to allocate, abort } - int sw = src.fBounds.width(); - int sh = src.fBounds.height(); - const uint8_t* sp = src.fImage; - uint8_t* dp = SkMask::AllocImage(dstSize); + int sw = src.fBounds.width(); + int sh = src.fBounds.height(); + const uint8_t* sp = src.fImage; + uint8_t* dp = SkMask::AllocImage(dstSize); SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp); // build the blurry destination - SkAutoTMalloc<uint8_t> tmpBuffer(dstSize); - uint8_t* tp = tmpBuffer.get(); + SkAutoTMalloc<uint8_t> tmpBuffer(dstSize); + uint8_t* tp = tmpBuffer.get(); int w = sw, h = sh; if (outerWeight == 255) { @@ -555,33 +560,40 @@ bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src, if (kHigh_SkBlurQuality == quality) { // Do three X blurs, with a transpose on the final one. w = boxBlur<false>(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h); - w = boxBlur<false>(tp, w, dp, hiRadius, loRadius, w, h); - w = boxBlur<true>(dp, w, tp, hiRadius, hiRadius, w, h); + w = boxBlur<false>(tp, w, dp, hiRadius, loRadius, w, h); + w = boxBlur<true>(dp, w, tp, hiRadius, hiRadius, w, h); // Do three Y blurs, with a transpose on the final one. - h = boxBlur<false>(tp, h, dp, loRadius, hiRadius, h, w); - h = boxBlur<false>(dp, h, tp, hiRadius, loRadius, h, w); - h = boxBlur<true>(tp, h, dp, hiRadius, hiRadius, h, w); + h = boxBlur<false>(tp, h, dp, loRadius, hiRadius, h, w); + h = boxBlur<false>(dp, h, tp, hiRadius, loRadius, h, w); + h = boxBlur<true>(tp, h, dp, hiRadius, hiRadius, h, w); } else { w = boxBlur<true>(sp, src.fRowBytes, tp, rx, rx, w, h); - h = boxBlur<true>(tp, h, dp, ry, ry, h, w); + h = boxBlur<true>(tp, h, dp, ry, ry, h, w); } } else { if (kHigh_SkBlurQuality == quality) { // Do three X blurs, with a transpose on the final one. w = boxBlurInterp<false>(sp, src.fRowBytes, tp, rx, w, h, outerWeight); - w = boxBlurInterp<false>(tp, w, dp, rx, w, h, outerWeight); - w = boxBlurInterp<true>(dp, w, tp, rx, w, h, outerWeight); + w = boxBlurInterp<false>(tp, w, dp, rx, w, h, outerWeight); + w = boxBlurInterp<true>(dp, w, tp, rx, w, h, outerWeight); // Do three Y blurs, with a transpose on the final one. - h = boxBlurInterp<false>(tp, h, dp, ry, h, w, outerWeight); - h = boxBlurInterp<false>(dp, h, tp, ry, h, w, outerWeight); - h = boxBlurInterp<true>(tp, h, dp, ry, h, w, outerWeight); + h = boxBlurInterp<false>(tp, h, dp, ry, h, w, outerWeight); + h = boxBlurInterp<false>(dp, h, tp, ry, h, w, outerWeight); + h = boxBlurInterp<true>(tp, h, dp, ry, h, w, outerWeight); } else { w = boxBlurInterp<true>(sp, src.fRowBytes, tp, rx, w, h, outerWeight); - h = boxBlurInterp<true>(tp, h, dp, ry, h, w, outerWeight); + h = boxBlurInterp<true>(tp, h, dp, ry, h, w, outerWeight); } } - dst->fImage = dp; + dst->fImage = autoCall.release(); + } +#else + SkMaskBlurFilter blurFilter{sigma, sigma}; + border = blurFilter.blur(src, dst); +#endif // SK_SUPPORT_LEGACY_MASK_BLUR + + if (src.fImage != nullptr) { // if need be, alloc the "real" dst (same size as src) and copy/merge // the blur into it (applying the src) if (style == kInner_SkBlurStyle) { @@ -590,17 +602,21 @@ bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src, if (0 == srcSize) { return false; // too big to allocate, abort } + auto blur = dst->fImage; dst->fImage = SkMask::AllocImage(srcSize); + auto blurStart = &blur[border.x() + border.y() * dst->fRowBytes]; merge_src_with_blur(dst->fImage, src.fRowBytes, - sp, src.fRowBytes, - dp + passCount * (rx + ry * dst->fRowBytes), - dst->fRowBytes, sw, sh); - SkMask::FreeImage(dp); + src.fImage, src.fRowBytes, + blurStart, + dst->fRowBytes, + src.fBounds.width(), src.fBounds.height()); + SkMask::FreeImage(blur); } else if (style != kNormal_SkBlurStyle) { - clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes), - dst->fRowBytes, sp, src.fRowBytes, sw, sh, style); + auto dstStart = &dst->fImage[border.x() + border.y() * dst->fRowBytes]; + clamp_with_orig(dstStart, + dst->fRowBytes, src.fImage, src.fRowBytes, + src.fBounds.width(), src.fBounds.height(), style); } - (void)autoCall.release(); } if (style == kInner_SkBlurStyle) { @@ -608,6 +624,10 @@ bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src, dst->fRowBytes = src.fRowBytes; } + if (margin != nullptr) { + *margin = border; + } + return true; } |