diff options
author | Herb Derby <herb@google.com> | 2017-11-16 21:43:26 +0000 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2017-11-16 22:46:09 +0000 |
commit | 35c556f33e2c9316055537e5683c28ea1645ff49 (patch) | |
tree | 4bb7c7a01a41350254465f132d3ca21573205de6 /src/core/SkMaskBlurFilter.cpp | |
parent | 1e09e461d2ffcf8b07242cfe93dd7d12c4d75866 (diff) |
Revert "Revert "Direct evaluation of gaussian""
This reverts commit a53d999007f92ecd4244b078fa909b76fd0d9f3b.
Reason for revert: Bug in SkNx_sse fixed.
Original change's description:
> Revert "Direct evaluation of gaussian"
>
> This reverts commit 5e18cdea0a0a3f23d8e8b8140c82a4b54e121402.
>
> Reason for revert: ASAN
> Original change's description:
> > Direct evaluation of gaussian
> >
> > The SVG(CSS) standard allows the 3 pass algorithm for sigma >= 2. But
> > sigma < 2, the code must evaluate to the convolution. The old code used
> > an interpolation scheme between windowed filters. This code directly
> > evaluates the gaussian kernel for sigma < 2.
> >
> > This code produces cleaner results, is 25% faster, and does not use a
> > temporary memory buffer.
> >
> > Change-Id: Ibd0caa73cadd06b637f55ba7bd4fefcfe7ac73db
> > Reviewed-on: https://skia-review.googlesource.com/62540
> > Commit-Queue: Herb Derby <herb@google.com>
> > Reviewed-by: Mike Klein <mtklein@google.com>
>
> TBR=mtklein@google.com,herb@google.com
>
> Change-Id: I936077dfa659d71bc361339d98340c55545a1eb8
> No-Presubmit: true
> No-Tree-Checks: true
> No-Try: true
> Reviewed-on: https://skia-review.googlesource.com/72481
> Reviewed-by: Brian Osman <brianosman@google.com>
> Commit-Queue: Brian Osman <brianosman@google.com>
TBR=mtklein@google.com,herb@google.com,brianosman@google.com
Change-Id: I4c30e3481308a8148d40223519e286885ec6f880
Reviewed-on: https://skia-review.googlesource.com/72900
Commit-Queue: Herb Derby <herb@google.com>
Reviewed-by: Herb Derby <herb@google.com>
Diffstat (limited to 'src/core/SkMaskBlurFilter.cpp')
-rw-r--r-- | src/core/SkMaskBlurFilter.cpp | 642 |
1 files changed, 638 insertions, 4 deletions
diff --git a/src/core/SkMaskBlurFilter.cpp b/src/core/SkMaskBlurFilter.cpp index fea98d6e77..9de68fcedc 100644 --- a/src/core/SkMaskBlurFilter.cpp +++ b/src/core/SkMaskBlurFilter.cpp @@ -11,6 +11,7 @@ #include <climits> #include "SkArenaAlloc.h" +#include "SkGaussFilter.h" #include "SkNx.h" #include "SkSafeMath.h" @@ -568,8 +569,644 @@ static SkMask prepare_destination(int radiusX, int radiusY, const SkMask& src) { return dst; } +#if !defined(SK_USE_LEGACY_INTERP_BLUR) +static constexpr uint16_t _____ = 0u; +static constexpr uint16_t kHalf = 0x80u; + +static SK_ALWAYS_INLINE Sk8h load(const uint8_t* from, int width) { + uint8_t buffer[8]; + if (width < 8) { + sk_bzero(buffer, sizeof(buffer)); + for (int i = 0; i < width; i++) { + buffer[i] = from[i]; + } + from = buffer; + } + auto v = SkNx_cast<uint16_t>(Sk8b::Load(from)); + // Convert from 0-255 to 8.8 encoding. + return v << 8; +}; + +static SK_ALWAYS_INLINE void store(uint8_t* to, const Sk8h& v, int width) { + Sk8b b = SkNx_cast<uint8_t>(v >> 8); + if (width == 8) { + b.store(to); + } else { + uint8_t buffer[8]; + b.store(buffer); + for (int i = 0; i < width; i++) { + to[i] = buffer[i]; + } + } +}; + +// In all the blur_x_radius_N and blur_y_radius_N functions the gaussian values are encoded +// in 0.16 format, none of the values is greater than one. The incoming mask values are in 8.8 +// format. The resulting multiply has a 8.24 format, by the mulhi truncates the lower 16 bits +// resulting in a 8.8 format. +// +// The blur_x_radius_N function below blur along a row of pixels using a kernel with radius N. This +// system is setup to minimize the number of multiplies needed. +// +// Explanation: +// Blurring a specific mask value is given by the following equation where D_n is the resulting +// mask value and S_n is the source value. The example below is for a filter with a radius of 1 +// and a width of 3 (radius == (width-1)/2). The indexes for the source and destination are +// aligned. The filter is given by G_n where n is the symmetric filter value. +// +// D[n] = S[n-1]*G[1] + S[n]*G[0] + S[n+1]*G[1]. +// +// We can start the source index at an offset relative to the destination separated by the +// radius. This results in a non-traditional restating of the above filter. +// +// D[n] = S[n]*G[1] + S[n+1]*G[0] + S[n+2]*G[1] +// +// If we look at three specific consecutive destinations the following equations result: +// +// D[5] = S[5]*G[1] + S[6]*G[0] + S[7]*G[1] +// D[7] = S[6]*G[1] + S[7]*G[0] + S[8]*G[1] +// D[8] = S[7]*G[1] + S[8]*G[0] + S[9]*G[1]. +// +// In the above equations, notice that S[7] is used in all three. In particular, two values are +// used: S[7]*G[0] and S[7]*G[1]. So, S[7] is only multiplied twice, but used in D[5], D[6] and +// D[7]. +// +// From the point of view of a source value we end up with the following three equations. +// +// Given S[7]: +// D[5] += S[7]*G[1] +// D[6] += S[7]*G[0] +// D[7] += S[7]*G[1] +// +// In General: +// D[n] += S[n]*G[1] +// D[n+1] += S[n]*G[0] +// D[n+2] += S[n]*G[1] +// +// Now these equations can be ganged using SIMD to form: +// D[n..n+7] += S[n..n+7]*G[1] +// D[n+1..n+8] += S[n..n+7]*G[0] +// D[n+2..n+9] += S[n..n+7]*G[1] +// The next set of values becomes. +// D[n+8..n+15] += S[n+8..n+15]*G[1] +// D[n+9..n+16] += S[n+8..n+15]*G[0] +// D[n+10..n+17] += S[n+8..n+15]*G[1] +// You can see that the D[n+8] and D[n+9] values overlap the two sets, using parts of both +// S[n..7] and S[n+8..n+15]. +// +// Just one more transformation allows the code to maintain all working values in +// registers. I introduce the notation {0, S[n..n+7] * G[k]} to mean that the value where 0 is +// prepended to the array of values to form {0, S[n] * G[k], ..., S[n+7]*G[k]}. +// +// D[n..n+7] += S[n..n+7] * G[1] +// D[n..n+8] += {0, S[n..n+7] * G[0]} +// D[n..n+9] += {0, 0, S[n..n+7] * G[1]} +// +// Now we can encode D[n..n+7] in a single Sk8h register called d0, and D[n+8..n+15] in a +// register d8. In addition, S[0..n+7] becomes s0. +// +// The translation of the {0, S[n..n+7] * G[k]} is translated in the following way below. +// +// Sk8h v0 = s0*G[0] +// Sk8h v1 = s0*G[1] +// /* D[n..n+7] += S[n..n+7] * G[1] */ +// d0 += v1; +// /* D[n..n+8] += {0, S[n..n+7] * G[0]} */ +// d0 += {_____, v0[0], v0[1], v0[2], v0[3], v0[4], v0[5], v0[6]} +// d1 += {v0[7], _____, _____, _____, _____, _____, _____, _____} +// /* D[n..n+9] += {0, 0, S[n..n+7] * G[1]} */ +// d0 += {_____, _____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5]} +// d1 += {v1[6], v1[7], _____, _____, _____, _____, _____, _____} +// Where we rely on the compiler to generate efficient code for the {____, n, ....} notation. + +static SK_ALWAYS_INLINE void blur_x_radius_1( + const Sk8h& s0, + const Sk8h& g0, const Sk8h& g1, const Sk8h&, const Sk8h&, const Sk8h&, + Sk8h* d0, Sk8h* d8) { + + auto v1 = s0.mulHi(g1); + auto v0 = s0.mulHi(g0); + + // D[n..n+7] += S[n..n+7] * G[1] + *d0 += v1; + + //D[n..n+8] += {0, S[n..n+7] * G[0]} + *d0 += Sk8h{_____, v0[0], v0[1], v0[2], v0[3], v0[4], v0[5], v0[6]}; + *d8 += Sk8h{v0[7], _____, _____, _____, _____, _____, _____, _____}; + + // D[n..n+9] += {0, 0, S[n..n+7] * G[1]} + *d0 += Sk8h{_____, _____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5]}; + *d8 += Sk8h{v1[6], v1[7], _____, _____, _____, _____, _____, _____}; + +} + +static SK_ALWAYS_INLINE void blur_x_radius_2( + const Sk8h& s0, + const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h&, const Sk8h&, + Sk8h* d0, Sk8h* d8) { + auto v0 = s0.mulHi(g0); + auto v1 = s0.mulHi(g1); + auto v2 = s0.mulHi(g2); + + // D[n..n+7] += S[n..n+7] * G[2] + *d0 += v2; + + // D[n..n+8] += {0, S[n..n+7] * G[1]} + *d0 += Sk8h{_____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5], v1[6]}; + *d8 += Sk8h{v1[7], _____, _____, _____, _____, _____, _____, _____}; + + // D[n..n+9] += {0, 0, S[n..n+7] * G[0]} + *d0 += Sk8h{_____, _____, v0[0], v0[1], v0[2], v0[3], v0[4], v0[5]}; + *d8 += Sk8h{v0[6], v0[7], _____, _____, _____, _____, _____, _____}; + + // D[n..n+10] += {0, 0, 0, S[n..n+7] * G[1]} + *d0 += Sk8h{_____, _____, _____, v1[0], v1[1], v1[2], v1[3], v1[4]}; + *d8 += Sk8h{v1[5], v1[6], v1[7], _____, _____, _____, _____, _____}; + + // D[n..n+11] += {0, 0, 0, 0, S[n..n+7] * G[2]} + *d0 += Sk8h{_____, _____, _____, _____, v2[0], v2[1], v2[2], v2[3]}; + *d8 += Sk8h{v2[4], v2[5], v2[6], v2[7], _____, _____, _____, _____}; +} + +static SK_ALWAYS_INLINE void blur_x_radius_3( + const Sk8h& s0, + const Sk8h& gauss0, const Sk8h& gauss1, const Sk8h& gauss2, const Sk8h& gauss3, const Sk8h&, + Sk8h* d0, Sk8h* d8) { + auto v0 = s0.mulHi(gauss0); + auto v1 = s0.mulHi(gauss1); + auto v2 = s0.mulHi(gauss2); + auto v3 = s0.mulHi(gauss3); + + // D[n..n+7] += S[n..n+7] * G[3] + *d0 += v3; + + // D[n..n+8] += {0, S[n..n+7] * G[2]} + *d0 += Sk8h{_____, v2[0], v2[1], v2[2], v2[3], v2[4], v2[5], v2[6]}; + *d8 += Sk8h{v2[7], _____, _____, _____, _____, _____, _____, _____}; + + // D[n..n+9] += {0, 0, S[n..n+7] * G[1]} + *d0 += Sk8h{_____, _____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5]}; + *d8 += Sk8h{v1[6], v1[7], _____, _____, _____, _____, _____, _____}; + + // D[n..n+10] += {0, 0, 0, S[n..n+7] * G[0]} + *d0 += Sk8h{_____, _____, _____, v0[0], v0[1], v0[2], v0[3], v0[4]}; + *d8 += Sk8h{v0[5], v0[6], v0[7], _____, _____, _____, _____, _____}; + + // D[n..n+11] += {0, 0, 0, 0, S[n..n+7] * G[1]} + *d0 += Sk8h{_____, _____, _____, _____, v1[0], v1[1], v1[2], v1[3]}; + *d8 += Sk8h{v1[4], v1[5], v1[6], v1[7], _____, _____, _____, _____}; + + // D[n..n+12] += {0, 0, 0, 0, 0, S[n..n+7] * G[2]} + *d0 += Sk8h{_____, _____, _____, _____, _____, v2[0], v2[1], v2[2]}; + *d8 += Sk8h{v2[3], v2[4], v2[5], v2[6], v2[7], _____, _____, _____}; + + // D[n..n+13] += {0, 0, 0, 0, 0, 0, S[n..n+7] * G[3]} + *d0 += Sk8h{_____, _____, _____, _____, _____, _____, v3[0], v3[1]}; + *d8 += Sk8h{v3[2], v3[3], v3[4], v3[5], v3[6], v3[7], _____, _____}; +} + +static SK_ALWAYS_INLINE void blur_x_radius_4( + const Sk8h& s0, + const Sk8h& gauss0, + const Sk8h& gauss1, + const Sk8h& gauss2, + const Sk8h& gauss3, + const Sk8h& gauss4, + Sk8h* d0, Sk8h* d8) { + auto v0 = s0.mulHi(gauss0); + auto v1 = s0.mulHi(gauss1); + auto v2 = s0.mulHi(gauss2); + auto v3 = s0.mulHi(gauss3); + auto v4 = s0.mulHi(gauss4); + + // D[n..n+7] += S[n..n+7] * G[4] + *d0 += v4; + + // D[n..n+8] += {0, S[n..n+7] * G[3]} + *d0 += Sk8h{_____, v3[0], v3[1], v3[2], v3[3], v3[4], v3[5], v3[6]}; + *d8 += Sk8h{v3[7], _____, _____, _____, _____, _____, _____, _____}; + + // D[n..n+9] += {0, 0, S[n..n+7] * G[2]} + *d0 += Sk8h{_____, _____, v2[0], v2[1], v2[2], v2[3], v2[4], v2[5]}; + *d8 += Sk8h{v2[6], v2[7], _____, _____, _____, _____, _____, _____}; + + // D[n..n+10] += {0, 0, 0, S[n..n+7] * G[1]} + *d0 += Sk8h{_____, _____, _____, v1[0], v1[1], v1[2], v1[3], v1[4]}; + *d8 += Sk8h{v1[5], v1[6], v1[7], _____, _____, _____, _____, _____}; + + // D[n..n+11] += {0, 0, 0, 0, S[n..n+7] * G[0]} + *d0 += Sk8h{_____, _____, _____, _____, v0[0], v0[1], v0[2], v0[3]}; + *d8 += Sk8h{v0[4], v0[5], v0[6], v0[7], _____, _____, _____, _____}; + + // D[n..n+12] += {0, 0, 0, 0, 0, S[n..n+7] * G[1]} + *d0 += Sk8h{_____, _____, _____, _____, _____, v1[0], v1[1], v1[2]}; + *d8 += Sk8h{v1[3], v1[4], v1[5], v1[6], v1[7], _____, _____, _____}; + + // D[n..n+13] += {0, 0, 0, 0, 0, 0, S[n..n+7] * G[2]} + *d0 += Sk8h{_____, _____, _____, _____, _____, _____, v2[0], v2[1]}; + *d8 += Sk8h{v2[2], v2[3], v2[4], v2[5], v2[6], v2[7], _____, _____}; + + // D[n..n+14] += {0, 0, 0, 0, 0, 0, 0, S[n..n+7] * G[3]} + *d0 += Sk8h{_____, _____, _____, _____, _____, _____, _____, v3[0]}; + *d8 += Sk8h{v3[1], v3[2], v3[3], v3[4], v3[5], v3[6], v3[7], _____}; + + // D[n..n+15] += {0, 0, 0, 0, 0, 0, 0, 0, S[n..n+7] * G[4]} + *d8 += v4; +} + +using BlurX = decltype(blur_x_radius_1); + +// BlurX will only be one of the functions blur_x_radius_(1|2|3|4). +static SK_ALWAYS_INLINE void blur_row( + BlurX blur, + const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h& g3, const Sk8h& g4, + const uint8_t* src, int srcW, + uint8_t* dst, int dstW) { + // Clear the buffer to handle summing wider than source. + Sk8h d0{kHalf}, d8{kHalf}; + + // Go by multiples of 8 in src. + int x = 0; + for (; x <= srcW - 8; x += 8) { + blur(load(src, 8), g0, g1, g2, g3, g4, &d0, &d8); + + store(dst, d0, 8); + + d0 = d8; + d8 = Sk8h{kHalf}; + + src += 8; + dst += 8; + } + + // There are src values left, but the remainder of src values is not a multiple of 8. + int srcTail = srcW - x; + if (srcTail > 0) { + + blur(load(src, srcTail), g0, g1, g2, g3, g4, &d0, &d8); + + int dstTail = std::min(8, dstW - x); + store(dst, d0, dstTail); + + d0 = d8; + dst += dstTail; + x += dstTail; + } + + // There are dst mask values to complete. + int dstTail = dstW - x; + if (dstTail > 0) { + store(dst, d0, dstTail); + } +} + +// BlurX will only be one of the functions blur_x_radius_(1|2|3|4). +static SK_ALWAYS_INLINE void blur_x_rect( + BlurX blur, + uint16_t* gauss, + const uint8_t* src, size_t srcStride, int srcW, + uint8_t* dst, size_t dstStride, int dstW, int dstH) { + + Sk8h g0{gauss[0]}, + g1{gauss[1]}, + g2{gauss[2]}, + g3{gauss[3]}, + g4{gauss[4]}; + + // Blur *ALL* the rows. + for (int y = 0; y < dstH; y++) { + blur_row(blur, g0, g1, g2, g3, g4, src, srcW, dst, dstW); + src += srcStride; + dst += dstStride; + } +} + +SK_ATTRIBUTE(noinline) static void direct_blur_x( + int radius, uint16_t* gauss, + const uint8_t* src, size_t srcStride, int srcW, + uint8_t* dst, size_t dstStride, int dstW, int dstH) { + + switch (radius) { + case 1: + blur_x_rect(blur_x_radius_1, gauss, src, srcStride, srcW, dst, dstStride, dstW, dstH); + break; + + case 2: + blur_x_rect(blur_x_radius_2, gauss, src, srcStride, srcW, dst, dstStride, dstW, dstH); + break; + + case 3: + blur_x_rect(blur_x_radius_3, gauss, src, srcStride, srcW, dst, dstStride, dstW, dstH); + break; + + case 4: + blur_x_rect(blur_x_radius_4, gauss, src, srcStride, srcW, dst, dstStride, dstW, dstH); + break; + + default: + SkASSERTF(false, "The radius %d is not handled\n", radius); + } +} + +// The operations of the blur_y_radius_N functions work on a theme similar to the blur_x_radius_N +// functions, but end up being simpler because there is no complicated shift of registers. We +// start with the non-traditional form of the gaussian filter. In the following r is the value +// when added generates the next value in the column. +// +// D[n+0r] = S[n+0r]*G[1] +// + S[n+1r]*G[0] +// + S[n+2r]*G[1] +// +// Expanding out in a way similar to blur_x_radius_N for specific values of n. +// +// D[n+0r] = S[n-2r]*G[1] + S[n-1r]*G[0] + S[n+0r]*G[1] +// D[n+1r] = S[n-1r]*G[1] + S[n+0r]*G[0] + S[n+1r]*G[1] +// D[n+2r] = S[n+0r]*G[1] + S[n+1r]*G[0] + S[n+2r]*G[1] +// +// We can see that S[n+0r] is in all three D[] equations, but is only multiplied twice. Now we +// can look at the calculation form the point of view of a source value. +// +// Given S[n+0r]: +// D[n+0r] += S[n+0r]*G[1]; +// /* D[n+0r] is done and can be stored now. */ +// D[n+1r] += S[n+0r]*G[0]; +// D[n+2r] = S[n+0r]*G[1]; +// +// Remember, by induction, that D[n+0r] == S[n-2r]*G[1] + S[n-1r]*G[0] before adding in +// S[n+0r]*G[1]. So, after the addition D[n+0r] has finished calculation and can be stored. Also, +// notice that D[n+2r] is receiving its first value from S[n+0r]*G[1] and is not added in. Notice +// how values flow in the following two iterations in source. +// +// D[n+0r] += S[n+0r]*G[1] +// D[n+1r] += S[n+0r]*G[0] +// D[n+2r] = S[n+0r]*G[1] +// /* ------- */ +// D[n+1r] += S[n+1r]*G[1] +// D[n+2r] += S[n+1r]*G[0] +// D[n+3r] = S[n+1r]*G[1] +// +// Instead of using memory we can introduce temporaries d01 and d12. The update step changes +// to the following. +// +// answer = d01 + S[n+0r]*G[1] +// d01 = d12 + S[n+0r]*G[0] +// d12 = S[n+0r]*G[1] +// return answer +// +// Finally, this can be ganged into SIMD style. +// answer[0..7] = d01[0..7] + S[n+0r..n+0r+7]*G[1] +// d01[0..7] = d12[0..7] + S[n+0r..n+0r+7]*G[0] +// d12[0..7] = S[n+0r..n+0r+7]*G[1] +// return answer[0..7] +static SK_ALWAYS_INLINE Sk8h blur_y_radius_1( + const Sk8h& s0, + const Sk8h& g0, const Sk8h& g1, const Sk8h&, const Sk8h&, const Sk8h&, + Sk8h* d01, Sk8h* d12, Sk8h*, Sk8h*, Sk8h*, Sk8h*, Sk8h*, Sk8h*) { + auto v0 = s0.mulHi(g0); + auto v1 = s0.mulHi(g1); + + Sk8h answer = *d01 + v1; + *d01 = *d12 + v0; + *d12 = v1 + kHalf; + + return answer; +} + +static SK_ALWAYS_INLINE Sk8h blur_y_radius_2( + const Sk8h& s0, + const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h&, const Sk8h&, + Sk8h* d01, Sk8h* d12, Sk8h* d23, Sk8h* d34, Sk8h*, Sk8h*, Sk8h*, Sk8h*) { + auto v0 = s0.mulHi(g0); + auto v1 = s0.mulHi(g1); + auto v2 = s0.mulHi(g2); + + Sk8h answer = *d01 + v2; + *d01 = *d12 + v1; + *d12 = *d23 + v0; + *d23 = *d34 + v1; + *d34 = v2 + kHalf; + + return answer; +} + +static SK_ALWAYS_INLINE Sk8h blur_y_radius_3( + const Sk8h& s0, + const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h& g3, const Sk8h&, + Sk8h* d01, Sk8h* d12, Sk8h* d23, Sk8h* d34, Sk8h* d45, Sk8h* d56, Sk8h*, Sk8h*) { + auto v0 = s0.mulHi(g0); + auto v1 = s0.mulHi(g1); + auto v2 = s0.mulHi(g2); + auto v3 = s0.mulHi(g3); + + Sk8h answer = *d01 + v3; + *d01 = *d12 + v2; + *d12 = *d23 + v1; + *d23 = *d34 + v0; + *d34 = *d45 + v1; + *d45 = *d56 + v2; + *d56 = v3 + kHalf; + + return answer; +} + +static SK_ALWAYS_INLINE Sk8h blur_y_radius_4( + const Sk8h& s0, + const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h& g3, const Sk8h& g4, + Sk8h* d01, Sk8h* d12, Sk8h* d23, Sk8h* d34, Sk8h* d45, Sk8h* d56, Sk8h* d67, Sk8h* d78) { + auto v0 = s0.mulHi(g0); + auto v1 = s0.mulHi(g1); + auto v2 = s0.mulHi(g2); + auto v3 = s0.mulHi(g3); + auto v4 = s0.mulHi(g4); + + Sk8h answer = *d01 + v4; + *d01 = *d12 + v3; + *d12 = *d23 + v2; + *d23 = *d34 + v1; + *d34 = *d45 + v0; + *d45 = *d56 + v1; + *d56 = *d67 + v2; + *d67 = *d78 + v3; + *d78 = v4 + kHalf; + + return answer; +} + +using BlurY = decltype(blur_y_radius_1); + +// BlurY will be one of blur_y_radius_(1|2|3|4). +static SK_ALWAYS_INLINE void blur_column( + BlurY blur, int radius, int width, + const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h& g3, const Sk8h& g4, + const uint8_t* src, size_t srcStride, int srcH, + uint8_t* dst, size_t dstStride) { + Sk8h d01{kHalf}, d12{kHalf}, d23{kHalf}, d34{kHalf}, + d45{kHalf}, d56{kHalf}, d67{kHalf}, d78{kHalf}; + + auto flush = [&](uint8_t* to, const Sk8h& v0, const Sk8h& v1) { + store(to, v0, width); + to += dstStride; + store(to, v1, width); + return to + dstStride; + }; + + for (int y = 0; y < srcH; y += 1) { + auto s = load(src, width); + auto b = blur(s, + g0, g1, g2, g3, g4, + &d01, &d12, &d23, &d34, &d45, &d56, &d67, &d78); + store(dst, b, width); + src += srcStride; + dst += dstStride; + } + + if (radius >= 1) { + dst = flush(dst, d01, d12); + } + if (radius >= 2) { + dst = flush(dst, d23, d34); + } + if (radius >= 3) { + dst = flush(dst, d45, d56); + } + if (radius >= 4) { + flush(dst, d67, d78); + } +} + +// BlurY will be one of blur_y_radius_(1|2|3|4). +static SK_ALWAYS_INLINE void blur_y_rect( + BlurY blur, int radius, uint16_t *gauss, + const uint8_t *src, size_t srcStride, int srcW, int srcH, + uint8_t *dst, size_t dstStride) { + + Sk8h g0{gauss[0]}, + g1{gauss[1]}, + g2{gauss[2]}, + g3{gauss[3]}, + g4{gauss[4]}; + + int x = 0; + for (; x <= srcW - 8; x += 8) { + blur_column(blur, radius, 8, + g0, g1, g2, g3, g4, + src, srcStride, srcH, + dst, dstStride); + src += 8; + dst += 8; + } + + int xTail = srcW - x; + if (xTail > 0) { + blur_column(blur, radius, xTail, + g0, g1, g2, g3, g4, + src, srcStride, srcH, + dst, dstStride); + } +} + +SK_ATTRIBUTE(noinline) static void direct_blur_y( + int radius, uint16_t* gauss, + const uint8_t* src, size_t srcStride, int srcW, int srcH, + uint8_t* dst, size_t dstStride) { + + switch (radius) { + case 1: + blur_y_rect(blur_y_radius_1, 1, gauss, + src, srcStride, srcW, srcH, + dst, dstStride); + break; + + case 2: + blur_y_rect(blur_y_radius_2, 2, gauss, + src, srcStride, srcW, srcH, + dst, dstStride); + break; + + case 3: + blur_y_rect(blur_y_radius_3, 3, gauss, + src, srcStride, srcW, srcH, + dst, dstStride); + break; + + case 4: + blur_y_rect(blur_y_radius_4, 4, gauss, + src, srcStride, srcW, srcH, + dst, dstStride); + break; + + default: + SkASSERTF(false, "The radius %d is not handled\n", radius); + } +} + +static SkIPoint small_blur(double sigmaX, double sigmaY, const SkMask& src, SkMask* dst) { + SkASSERT(0 <= sigmaX && sigmaX < 2); + SkASSERT(0 <= sigmaY && sigmaY < 2); + + SkGaussFilter filterX{sigmaX, SkGaussFilter::Type::Bessel}, + filterY{sigmaY, SkGaussFilter::Type::Bessel}; + + int radiusX = filterX.radius(), + radiusY = filterY.radius(); + + SkASSERT(radiusX <= 4 && radiusY <= 4); + + auto prepareGauss = [](const SkGaussFilter& filter, uint16_t* factors) { + int i = 0; + for (double d : filter) { + factors[i++] = static_cast<uint16_t>(round(d * (1 << 16))); + } + }; + + uint16_t gaussFactorsX[5], + gaussFactorsY[5]; + + prepareGauss(filterX, gaussFactorsX); + prepareGauss(filterY, gaussFactorsY); + + *dst = prepare_destination(radiusX, radiusY, src); + if (src.fImage == nullptr) { + return {SkTo<int32_t>(radiusX), SkTo<int32_t>(radiusY)}; + } + if (dst->fImage == nullptr) { + dst->fBounds.setEmpty(); + return {0, 0}; + } + + int srcW = src.fBounds.width(), + srcH = src.fBounds.height(); + + int dstW = dst->fBounds.width(), + dstH = dst->fBounds.height(); + + size_t srcStride = src.fRowBytes, + dstStride = dst->fRowBytes; + + //TODO: handle bluring in only one direction. + + // Blur vertically and copy to destination. + direct_blur_y(radiusY, gaussFactorsY, + src.fImage, srcStride, srcW, srcH, + dst->fImage + radiusX, dstStride); + + // Blur horizontally in place. + direct_blur_x(radiusX, gaussFactorsX, + dst->fImage + radiusX, dstStride, srcW, + dst->fImage, dstStride, dstW, dstH); + + return {radiusX, radiusY}; +} +#endif // SK_USE_LEGACY_INTERP_BLUR + SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const { + #if !defined(SK_USE_LEGACY_INTERP_BLUR) + if (fSigmaW < 2.0 && fSigmaH < 2.0) { + return small_blur(fSigmaW, fSigmaH, src, dst); + } + #endif + // 1024 is a place holder guess until more analysis can be done. SkSTArenaAlloc<1024> alloc; @@ -623,7 +1260,6 @@ SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const { tmpStart, tmpW, tmpStart + tmpW * tmpH); } - // Blur vertically (scan in memory order because of the transposition), // and transpose back to the original orientation. auto scanH = planH->makeBlurScan(&alloc, tmpW, buffer); @@ -665,7 +1301,7 @@ SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const { for (size_t x = 0; x < srcW; x++) { auto srcStart = &src.fImage[x]; auto dstStart = &dst->fImage[x]; - scanH->blur(srcStart, src.fRowBytes, srcEnd, + scanH->blur(srcStart, src.fRowBytes, srcEnd, dstStart, dst->fRowBytes, dstEnd); } } else { @@ -678,5 +1314,3 @@ SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const { return {SkTo<int32_t>(borderW), SkTo<int32_t>(borderH)}; } - - |