diff options
-rw-r--r-- | bench/BlurBench.cpp | 5 | ||||
-rw-r--r-- | src/core/SkGaussFilter.cpp | 3 | ||||
-rw-r--r-- | src/core/SkGaussFilter.h | 6 | ||||
-rw-r--r-- | src/core/SkMaskBlurFilter.cpp | 642 |
4 files changed, 7 insertions, 649 deletions
diff --git a/bench/BlurBench.cpp b/bench/BlurBench.cpp index 5b94d66ff6..20afd4a2e5 100644 --- a/bench/BlurBench.cpp +++ b/bench/BlurBench.cpp @@ -15,11 +15,9 @@ #define MINI 0.01f #define SMALL SkIntToScalar(2) -#define REAL 0.5f +#define REAL 1.5f #define BIG SkIntToScalar(10) #define REALBIG 100.5f -// The value that produces a sigma of just over 2. -#define CUTOVER 2.6f static const char* gStyleName[] = { "normal", @@ -113,6 +111,5 @@ DEF_BENCH(return new BlurBench(BIG, kNormal_SkBlurStyle, SkBlurMaskFilter::kHigh DEF_BENCH(return new BlurBench(REALBIG, kNormal_SkBlurStyle, SkBlurMaskFilter::kHighQuality_BlurFlag);) DEF_BENCH(return new BlurBench(REAL, kNormal_SkBlurStyle, SkBlurMaskFilter::kHighQuality_BlurFlag);) -DEF_BENCH(return new BlurBench(CUTOVER, kNormal_SkBlurStyle, SkBlurMaskFilter::kHighQuality_BlurFlag);) DEF_BENCH(return new BlurBench(0, kNormal_SkBlurStyle);) diff --git a/src/core/SkGaussFilter.cpp b/src/core/SkGaussFilter.cpp index c0d612ddec..548ff4398d 100644 --- a/src/core/SkGaussFilter.cpp +++ b/src/core/SkGaussFilter.cpp @@ -143,9 +143,10 @@ SkGaussFilter::SkGaussFilter(double sigma, Type type) { } } -int SkGaussFilter::filterDouble(double values[5]) const { +int SkGaussFilter::filterDouble(double* values) const { for (int i = 0; i < fN; i++) { values[i] = fBasis[i]; } return fN; } + diff --git a/src/core/SkGaussFilter.h b/src/core/SkGaussFilter.h index be00cf2cc0..9af45c875b 100644 --- a/src/core/SkGaussFilter.h +++ b/src/core/SkGaussFilter.h @@ -27,16 +27,10 @@ public: int radius() const { return fN - 1; } int width() const { return 2 * this->radius() + 1; } - // TODO: remove filterDouble and use the ranged-for loop interface. - // Take an array of values where the gaussian factors will be placed. Return the number of // values filled. int filterDouble(double values[5]) const; - // Allow a filter to be used in a C++ ranged-for loop. - const double* begin() const { return &fBasis[0]; } - const double* end() const { return &fBasis[fN]; } - private: double fBasis[5]; int fN; diff --git a/src/core/SkMaskBlurFilter.cpp b/src/core/SkMaskBlurFilter.cpp index 9de68fcedc..fea98d6e77 100644 --- a/src/core/SkMaskBlurFilter.cpp +++ b/src/core/SkMaskBlurFilter.cpp @@ -11,7 +11,6 @@ #include <climits> #include "SkArenaAlloc.h" -#include "SkGaussFilter.h" #include "SkNx.h" #include "SkSafeMath.h" @@ -569,644 +568,8 @@ static SkMask prepare_destination(int radiusX, int radiusY, const SkMask& src) { return dst; } -#if !defined(SK_USE_LEGACY_INTERP_BLUR) -static constexpr uint16_t _____ = 0u; -static constexpr uint16_t kHalf = 0x80u; - -static SK_ALWAYS_INLINE Sk8h load(const uint8_t* from, int width) { - uint8_t buffer[8]; - if (width < 8) { - sk_bzero(buffer, sizeof(buffer)); - for (int i = 0; i < width; i++) { - buffer[i] = from[i]; - } - from = buffer; - } - auto v = SkNx_cast<uint16_t>(Sk8b::Load(from)); - // Convert from 0-255 to 8.8 encoding. - return v << 8; -}; - -static SK_ALWAYS_INLINE void store(uint8_t* to, const Sk8h& v, int width) { - Sk8b b = SkNx_cast<uint8_t>(v >> 8); - if (width == 8) { - b.store(to); - } else { - uint8_t buffer[8]; - b.store(buffer); - for (int i = 0; i < width; i++) { - to[i] = buffer[i]; - } - } -}; - -// In all the blur_x_radius_N and blur_y_radius_N functions the gaussian values are encoded -// in 0.16 format, none of the values is greater than one. The incoming mask values are in 8.8 -// format. The resulting multiply has a 8.24 format, by the mulhi truncates the lower 16 bits -// resulting in a 8.8 format. -// -// The blur_x_radius_N function below blur along a row of pixels using a kernel with radius N. This -// system is setup to minimize the number of multiplies needed. -// -// Explanation: -// Blurring a specific mask value is given by the following equation where D_n is the resulting -// mask value and S_n is the source value. The example below is for a filter with a radius of 1 -// and a width of 3 (radius == (width-1)/2). The indexes for the source and destination are -// aligned. The filter is given by G_n where n is the symmetric filter value. -// -// D[n] = S[n-1]*G[1] + S[n]*G[0] + S[n+1]*G[1]. -// -// We can start the source index at an offset relative to the destination separated by the -// radius. This results in a non-traditional restating of the above filter. -// -// D[n] = S[n]*G[1] + S[n+1]*G[0] + S[n+2]*G[1] -// -// If we look at three specific consecutive destinations the following equations result: -// -// D[5] = S[5]*G[1] + S[6]*G[0] + S[7]*G[1] -// D[7] = S[6]*G[1] + S[7]*G[0] + S[8]*G[1] -// D[8] = S[7]*G[1] + S[8]*G[0] + S[9]*G[1]. -// -// In the above equations, notice that S[7] is used in all three. In particular, two values are -// used: S[7]*G[0] and S[7]*G[1]. So, S[7] is only multiplied twice, but used in D[5], D[6] and -// D[7]. -// -// From the point of view of a source value we end up with the following three equations. -// -// Given S[7]: -// D[5] += S[7]*G[1] -// D[6] += S[7]*G[0] -// D[7] += S[7]*G[1] -// -// In General: -// D[n] += S[n]*G[1] -// D[n+1] += S[n]*G[0] -// D[n+2] += S[n]*G[1] -// -// Now these equations can be ganged using SIMD to form: -// D[n..n+7] += S[n..n+7]*G[1] -// D[n+1..n+8] += S[n..n+7]*G[0] -// D[n+2..n+9] += S[n..n+7]*G[1] -// The next set of values becomes. -// D[n+8..n+15] += S[n+8..n+15]*G[1] -// D[n+9..n+16] += S[n+8..n+15]*G[0] -// D[n+10..n+17] += S[n+8..n+15]*G[1] -// You can see that the D[n+8] and D[n+9] values overlap the two sets, using parts of both -// S[n..7] and S[n+8..n+15]. -// -// Just one more transformation allows the code to maintain all working values in -// registers. I introduce the notation {0, S[n..n+7] * G[k]} to mean that the value where 0 is -// prepended to the array of values to form {0, S[n] * G[k], ..., S[n+7]*G[k]}. -// -// D[n..n+7] += S[n..n+7] * G[1] -// D[n..n+8] += {0, S[n..n+7] * G[0]} -// D[n..n+9] += {0, 0, S[n..n+7] * G[1]} -// -// Now we can encode D[n..n+7] in a single Sk8h register called d0, and D[n+8..n+15] in a -// register d8. In addition, S[0..n+7] becomes s0. -// -// The translation of the {0, S[n..n+7] * G[k]} is translated in the following way below. -// -// Sk8h v0 = s0*G[0] -// Sk8h v1 = s0*G[1] -// /* D[n..n+7] += S[n..n+7] * G[1] */ -// d0 += v1; -// /* D[n..n+8] += {0, S[n..n+7] * G[0]} */ -// d0 += {_____, v0[0], v0[1], v0[2], v0[3], v0[4], v0[5], v0[6]} -// d1 += {v0[7], _____, _____, _____, _____, _____, _____, _____} -// /* D[n..n+9] += {0, 0, S[n..n+7] * G[1]} */ -// d0 += {_____, _____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5]} -// d1 += {v1[6], v1[7], _____, _____, _____, _____, _____, _____} -// Where we rely on the compiler to generate efficient code for the {____, n, ....} notation. - -static SK_ALWAYS_INLINE void blur_x_radius_1( - const Sk8h& s0, - const Sk8h& g0, const Sk8h& g1, const Sk8h&, const Sk8h&, const Sk8h&, - Sk8h* d0, Sk8h* d8) { - - auto v1 = s0.mulHi(g1); - auto v0 = s0.mulHi(g0); - - // D[n..n+7] += S[n..n+7] * G[1] - *d0 += v1; - - //D[n..n+8] += {0, S[n..n+7] * G[0]} - *d0 += Sk8h{_____, v0[0], v0[1], v0[2], v0[3], v0[4], v0[5], v0[6]}; - *d8 += Sk8h{v0[7], _____, _____, _____, _____, _____, _____, _____}; - - // D[n..n+9] += {0, 0, S[n..n+7] * G[1]} - *d0 += Sk8h{_____, _____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5]}; - *d8 += Sk8h{v1[6], v1[7], _____, _____, _____, _____, _____, _____}; - -} - -static SK_ALWAYS_INLINE void blur_x_radius_2( - const Sk8h& s0, - const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h&, const Sk8h&, - Sk8h* d0, Sk8h* d8) { - auto v0 = s0.mulHi(g0); - auto v1 = s0.mulHi(g1); - auto v2 = s0.mulHi(g2); - - // D[n..n+7] += S[n..n+7] * G[2] - *d0 += v2; - - // D[n..n+8] += {0, S[n..n+7] * G[1]} - *d0 += Sk8h{_____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5], v1[6]}; - *d8 += Sk8h{v1[7], _____, _____, _____, _____, _____, _____, _____}; - - // D[n..n+9] += {0, 0, S[n..n+7] * G[0]} - *d0 += Sk8h{_____, _____, v0[0], v0[1], v0[2], v0[3], v0[4], v0[5]}; - *d8 += Sk8h{v0[6], v0[7], _____, _____, _____, _____, _____, _____}; - - // D[n..n+10] += {0, 0, 0, S[n..n+7] * G[1]} - *d0 += Sk8h{_____, _____, _____, v1[0], v1[1], v1[2], v1[3], v1[4]}; - *d8 += Sk8h{v1[5], v1[6], v1[7], _____, _____, _____, _____, _____}; - - // D[n..n+11] += {0, 0, 0, 0, S[n..n+7] * G[2]} - *d0 += Sk8h{_____, _____, _____, _____, v2[0], v2[1], v2[2], v2[3]}; - *d8 += Sk8h{v2[4], v2[5], v2[6], v2[7], _____, _____, _____, _____}; -} - -static SK_ALWAYS_INLINE void blur_x_radius_3( - const Sk8h& s0, - const Sk8h& gauss0, const Sk8h& gauss1, const Sk8h& gauss2, const Sk8h& gauss3, const Sk8h&, - Sk8h* d0, Sk8h* d8) { - auto v0 = s0.mulHi(gauss0); - auto v1 = s0.mulHi(gauss1); - auto v2 = s0.mulHi(gauss2); - auto v3 = s0.mulHi(gauss3); - - // D[n..n+7] += S[n..n+7] * G[3] - *d0 += v3; - - // D[n..n+8] += {0, S[n..n+7] * G[2]} - *d0 += Sk8h{_____, v2[0], v2[1], v2[2], v2[3], v2[4], v2[5], v2[6]}; - *d8 += Sk8h{v2[7], _____, _____, _____, _____, _____, _____, _____}; - - // D[n..n+9] += {0, 0, S[n..n+7] * G[1]} - *d0 += Sk8h{_____, _____, v1[0], v1[1], v1[2], v1[3], v1[4], v1[5]}; - *d8 += Sk8h{v1[6], v1[7], _____, _____, _____, _____, _____, _____}; - - // D[n..n+10] += {0, 0, 0, S[n..n+7] * G[0]} - *d0 += Sk8h{_____, _____, _____, v0[0], v0[1], v0[2], v0[3], v0[4]}; - *d8 += Sk8h{v0[5], v0[6], v0[7], _____, _____, _____, _____, _____}; - - // D[n..n+11] += {0, 0, 0, 0, S[n..n+7] * G[1]} - *d0 += Sk8h{_____, _____, _____, _____, v1[0], v1[1], v1[2], v1[3]}; - *d8 += Sk8h{v1[4], v1[5], v1[6], v1[7], _____, _____, _____, _____}; - - // D[n..n+12] += {0, 0, 0, 0, 0, S[n..n+7] * G[2]} - *d0 += Sk8h{_____, _____, _____, _____, _____, v2[0], v2[1], v2[2]}; - *d8 += Sk8h{v2[3], v2[4], v2[5], v2[6], v2[7], _____, _____, _____}; - - // D[n..n+13] += {0, 0, 0, 0, 0, 0, S[n..n+7] * G[3]} - *d0 += Sk8h{_____, _____, _____, _____, _____, _____, v3[0], v3[1]}; - *d8 += Sk8h{v3[2], v3[3], v3[4], v3[5], v3[6], v3[7], _____, _____}; -} - -static SK_ALWAYS_INLINE void blur_x_radius_4( - const Sk8h& s0, - const Sk8h& gauss0, - const Sk8h& gauss1, - const Sk8h& gauss2, - const Sk8h& gauss3, - const Sk8h& gauss4, - Sk8h* d0, Sk8h* d8) { - auto v0 = s0.mulHi(gauss0); - auto v1 = s0.mulHi(gauss1); - auto v2 = s0.mulHi(gauss2); - auto v3 = s0.mulHi(gauss3); - auto v4 = s0.mulHi(gauss4); - - // D[n..n+7] += S[n..n+7] * G[4] - *d0 += v4; - - // D[n..n+8] += {0, S[n..n+7] * G[3]} - *d0 += Sk8h{_____, v3[0], v3[1], v3[2], v3[3], v3[4], v3[5], v3[6]}; - *d8 += Sk8h{v3[7], _____, _____, _____, _____, _____, _____, _____}; - - // D[n..n+9] += {0, 0, S[n..n+7] * G[2]} - *d0 += Sk8h{_____, _____, v2[0], v2[1], v2[2], v2[3], v2[4], v2[5]}; - *d8 += Sk8h{v2[6], v2[7], _____, _____, _____, _____, _____, _____}; - - // D[n..n+10] += {0, 0, 0, S[n..n+7] * G[1]} - *d0 += Sk8h{_____, _____, _____, v1[0], v1[1], v1[2], v1[3], v1[4]}; - *d8 += Sk8h{v1[5], v1[6], v1[7], _____, _____, _____, _____, _____}; - - // D[n..n+11] += {0, 0, 0, 0, S[n..n+7] * G[0]} - *d0 += Sk8h{_____, _____, _____, _____, v0[0], v0[1], v0[2], v0[3]}; - *d8 += Sk8h{v0[4], v0[5], v0[6], v0[7], _____, _____, _____, _____}; - - // D[n..n+12] += {0, 0, 0, 0, 0, S[n..n+7] * G[1]} - *d0 += Sk8h{_____, _____, _____, _____, _____, v1[0], v1[1], v1[2]}; - *d8 += Sk8h{v1[3], v1[4], v1[5], v1[6], v1[7], _____, _____, _____}; - - // D[n..n+13] += {0, 0, 0, 0, 0, 0, S[n..n+7] * G[2]} - *d0 += Sk8h{_____, _____, _____, _____, _____, _____, v2[0], v2[1]}; - *d8 += Sk8h{v2[2], v2[3], v2[4], v2[5], v2[6], v2[7], _____, _____}; - - // D[n..n+14] += {0, 0, 0, 0, 0, 0, 0, S[n..n+7] * G[3]} - *d0 += Sk8h{_____, _____, _____, _____, _____, _____, _____, v3[0]}; - *d8 += Sk8h{v3[1], v3[2], v3[3], v3[4], v3[5], v3[6], v3[7], _____}; - - // D[n..n+15] += {0, 0, 0, 0, 0, 0, 0, 0, S[n..n+7] * G[4]} - *d8 += v4; -} - -using BlurX = decltype(blur_x_radius_1); - -// BlurX will only be one of the functions blur_x_radius_(1|2|3|4). -static SK_ALWAYS_INLINE void blur_row( - BlurX blur, - const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h& g3, const Sk8h& g4, - const uint8_t* src, int srcW, - uint8_t* dst, int dstW) { - // Clear the buffer to handle summing wider than source. - Sk8h d0{kHalf}, d8{kHalf}; - - // Go by multiples of 8 in src. - int x = 0; - for (; x <= srcW - 8; x += 8) { - blur(load(src, 8), g0, g1, g2, g3, g4, &d0, &d8); - - store(dst, d0, 8); - - d0 = d8; - d8 = Sk8h{kHalf}; - - src += 8; - dst += 8; - } - - // There are src values left, but the remainder of src values is not a multiple of 8. - int srcTail = srcW - x; - if (srcTail > 0) { - - blur(load(src, srcTail), g0, g1, g2, g3, g4, &d0, &d8); - - int dstTail = std::min(8, dstW - x); - store(dst, d0, dstTail); - - d0 = d8; - dst += dstTail; - x += dstTail; - } - - // There are dst mask values to complete. - int dstTail = dstW - x; - if (dstTail > 0) { - store(dst, d0, dstTail); - } -} - -// BlurX will only be one of the functions blur_x_radius_(1|2|3|4). -static SK_ALWAYS_INLINE void blur_x_rect( - BlurX blur, - uint16_t* gauss, - const uint8_t* src, size_t srcStride, int srcW, - uint8_t* dst, size_t dstStride, int dstW, int dstH) { - - Sk8h g0{gauss[0]}, - g1{gauss[1]}, - g2{gauss[2]}, - g3{gauss[3]}, - g4{gauss[4]}; - - // Blur *ALL* the rows. - for (int y = 0; y < dstH; y++) { - blur_row(blur, g0, g1, g2, g3, g4, src, srcW, dst, dstW); - src += srcStride; - dst += dstStride; - } -} - -SK_ATTRIBUTE(noinline) static void direct_blur_x( - int radius, uint16_t* gauss, - const uint8_t* src, size_t srcStride, int srcW, - uint8_t* dst, size_t dstStride, int dstW, int dstH) { - - switch (radius) { - case 1: - blur_x_rect(blur_x_radius_1, gauss, src, srcStride, srcW, dst, dstStride, dstW, dstH); - break; - - case 2: - blur_x_rect(blur_x_radius_2, gauss, src, srcStride, srcW, dst, dstStride, dstW, dstH); - break; - - case 3: - blur_x_rect(blur_x_radius_3, gauss, src, srcStride, srcW, dst, dstStride, dstW, dstH); - break; - - case 4: - blur_x_rect(blur_x_radius_4, gauss, src, srcStride, srcW, dst, dstStride, dstW, dstH); - break; - - default: - SkASSERTF(false, "The radius %d is not handled\n", radius); - } -} - -// The operations of the blur_y_radius_N functions work on a theme similar to the blur_x_radius_N -// functions, but end up being simpler because there is no complicated shift of registers. We -// start with the non-traditional form of the gaussian filter. In the following r is the value -// when added generates the next value in the column. -// -// D[n+0r] = S[n+0r]*G[1] -// + S[n+1r]*G[0] -// + S[n+2r]*G[1] -// -// Expanding out in a way similar to blur_x_radius_N for specific values of n. -// -// D[n+0r] = S[n-2r]*G[1] + S[n-1r]*G[0] + S[n+0r]*G[1] -// D[n+1r] = S[n-1r]*G[1] + S[n+0r]*G[0] + S[n+1r]*G[1] -// D[n+2r] = S[n+0r]*G[1] + S[n+1r]*G[0] + S[n+2r]*G[1] -// -// We can see that S[n+0r] is in all three D[] equations, but is only multiplied twice. Now we -// can look at the calculation form the point of view of a source value. -// -// Given S[n+0r]: -// D[n+0r] += S[n+0r]*G[1]; -// /* D[n+0r] is done and can be stored now. */ -// D[n+1r] += S[n+0r]*G[0]; -// D[n+2r] = S[n+0r]*G[1]; -// -// Remember, by induction, that D[n+0r] == S[n-2r]*G[1] + S[n-1r]*G[0] before adding in -// S[n+0r]*G[1]. So, after the addition D[n+0r] has finished calculation and can be stored. Also, -// notice that D[n+2r] is receiving its first value from S[n+0r]*G[1] and is not added in. Notice -// how values flow in the following two iterations in source. -// -// D[n+0r] += S[n+0r]*G[1] -// D[n+1r] += S[n+0r]*G[0] -// D[n+2r] = S[n+0r]*G[1] -// /* ------- */ -// D[n+1r] += S[n+1r]*G[1] -// D[n+2r] += S[n+1r]*G[0] -// D[n+3r] = S[n+1r]*G[1] -// -// Instead of using memory we can introduce temporaries d01 and d12. The update step changes -// to the following. -// -// answer = d01 + S[n+0r]*G[1] -// d01 = d12 + S[n+0r]*G[0] -// d12 = S[n+0r]*G[1] -// return answer -// -// Finally, this can be ganged into SIMD style. -// answer[0..7] = d01[0..7] + S[n+0r..n+0r+7]*G[1] -// d01[0..7] = d12[0..7] + S[n+0r..n+0r+7]*G[0] -// d12[0..7] = S[n+0r..n+0r+7]*G[1] -// return answer[0..7] -static SK_ALWAYS_INLINE Sk8h blur_y_radius_1( - const Sk8h& s0, - const Sk8h& g0, const Sk8h& g1, const Sk8h&, const Sk8h&, const Sk8h&, - Sk8h* d01, Sk8h* d12, Sk8h*, Sk8h*, Sk8h*, Sk8h*, Sk8h*, Sk8h*) { - auto v0 = s0.mulHi(g0); - auto v1 = s0.mulHi(g1); - - Sk8h answer = *d01 + v1; - *d01 = *d12 + v0; - *d12 = v1 + kHalf; - - return answer; -} - -static SK_ALWAYS_INLINE Sk8h blur_y_radius_2( - const Sk8h& s0, - const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h&, const Sk8h&, - Sk8h* d01, Sk8h* d12, Sk8h* d23, Sk8h* d34, Sk8h*, Sk8h*, Sk8h*, Sk8h*) { - auto v0 = s0.mulHi(g0); - auto v1 = s0.mulHi(g1); - auto v2 = s0.mulHi(g2); - - Sk8h answer = *d01 + v2; - *d01 = *d12 + v1; - *d12 = *d23 + v0; - *d23 = *d34 + v1; - *d34 = v2 + kHalf; - - return answer; -} - -static SK_ALWAYS_INLINE Sk8h blur_y_radius_3( - const Sk8h& s0, - const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h& g3, const Sk8h&, - Sk8h* d01, Sk8h* d12, Sk8h* d23, Sk8h* d34, Sk8h* d45, Sk8h* d56, Sk8h*, Sk8h*) { - auto v0 = s0.mulHi(g0); - auto v1 = s0.mulHi(g1); - auto v2 = s0.mulHi(g2); - auto v3 = s0.mulHi(g3); - - Sk8h answer = *d01 + v3; - *d01 = *d12 + v2; - *d12 = *d23 + v1; - *d23 = *d34 + v0; - *d34 = *d45 + v1; - *d45 = *d56 + v2; - *d56 = v3 + kHalf; - - return answer; -} - -static SK_ALWAYS_INLINE Sk8h blur_y_radius_4( - const Sk8h& s0, - const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h& g3, const Sk8h& g4, - Sk8h* d01, Sk8h* d12, Sk8h* d23, Sk8h* d34, Sk8h* d45, Sk8h* d56, Sk8h* d67, Sk8h* d78) { - auto v0 = s0.mulHi(g0); - auto v1 = s0.mulHi(g1); - auto v2 = s0.mulHi(g2); - auto v3 = s0.mulHi(g3); - auto v4 = s0.mulHi(g4); - - Sk8h answer = *d01 + v4; - *d01 = *d12 + v3; - *d12 = *d23 + v2; - *d23 = *d34 + v1; - *d34 = *d45 + v0; - *d45 = *d56 + v1; - *d56 = *d67 + v2; - *d67 = *d78 + v3; - *d78 = v4 + kHalf; - - return answer; -} - -using BlurY = decltype(blur_y_radius_1); - -// BlurY will be one of blur_y_radius_(1|2|3|4). -static SK_ALWAYS_INLINE void blur_column( - BlurY blur, int radius, int width, - const Sk8h& g0, const Sk8h& g1, const Sk8h& g2, const Sk8h& g3, const Sk8h& g4, - const uint8_t* src, size_t srcStride, int srcH, - uint8_t* dst, size_t dstStride) { - Sk8h d01{kHalf}, d12{kHalf}, d23{kHalf}, d34{kHalf}, - d45{kHalf}, d56{kHalf}, d67{kHalf}, d78{kHalf}; - - auto flush = [&](uint8_t* to, const Sk8h& v0, const Sk8h& v1) { - store(to, v0, width); - to += dstStride; - store(to, v1, width); - return to + dstStride; - }; - - for (int y = 0; y < srcH; y += 1) { - auto s = load(src, width); - auto b = blur(s, - g0, g1, g2, g3, g4, - &d01, &d12, &d23, &d34, &d45, &d56, &d67, &d78); - store(dst, b, width); - src += srcStride; - dst += dstStride; - } - - if (radius >= 1) { - dst = flush(dst, d01, d12); - } - if (radius >= 2) { - dst = flush(dst, d23, d34); - } - if (radius >= 3) { - dst = flush(dst, d45, d56); - } - if (radius >= 4) { - flush(dst, d67, d78); - } -} - -// BlurY will be one of blur_y_radius_(1|2|3|4). -static SK_ALWAYS_INLINE void blur_y_rect( - BlurY blur, int radius, uint16_t *gauss, - const uint8_t *src, size_t srcStride, int srcW, int srcH, - uint8_t *dst, size_t dstStride) { - - Sk8h g0{gauss[0]}, - g1{gauss[1]}, - g2{gauss[2]}, - g3{gauss[3]}, - g4{gauss[4]}; - - int x = 0; - for (; x <= srcW - 8; x += 8) { - blur_column(blur, radius, 8, - g0, g1, g2, g3, g4, - src, srcStride, srcH, - dst, dstStride); - src += 8; - dst += 8; - } - - int xTail = srcW - x; - if (xTail > 0) { - blur_column(blur, radius, xTail, - g0, g1, g2, g3, g4, - src, srcStride, srcH, - dst, dstStride); - } -} - -SK_ATTRIBUTE(noinline) static void direct_blur_y( - int radius, uint16_t* gauss, - const uint8_t* src, size_t srcStride, int srcW, int srcH, - uint8_t* dst, size_t dstStride) { - - switch (radius) { - case 1: - blur_y_rect(blur_y_radius_1, 1, gauss, - src, srcStride, srcW, srcH, - dst, dstStride); - break; - - case 2: - blur_y_rect(blur_y_radius_2, 2, gauss, - src, srcStride, srcW, srcH, - dst, dstStride); - break; - - case 3: - blur_y_rect(blur_y_radius_3, 3, gauss, - src, srcStride, srcW, srcH, - dst, dstStride); - break; - - case 4: - blur_y_rect(blur_y_radius_4, 4, gauss, - src, srcStride, srcW, srcH, - dst, dstStride); - break; - - default: - SkASSERTF(false, "The radius %d is not handled\n", radius); - } -} - -static SkIPoint small_blur(double sigmaX, double sigmaY, const SkMask& src, SkMask* dst) { - SkASSERT(0 <= sigmaX && sigmaX < 2); - SkASSERT(0 <= sigmaY && sigmaY < 2); - - SkGaussFilter filterX{sigmaX, SkGaussFilter::Type::Bessel}, - filterY{sigmaY, SkGaussFilter::Type::Bessel}; - - int radiusX = filterX.radius(), - radiusY = filterY.radius(); - - SkASSERT(radiusX <= 4 && radiusY <= 4); - - auto prepareGauss = [](const SkGaussFilter& filter, uint16_t* factors) { - int i = 0; - for (double d : filter) { - factors[i++] = static_cast<uint16_t>(round(d * (1 << 16))); - } - }; - - uint16_t gaussFactorsX[5], - gaussFactorsY[5]; - - prepareGauss(filterX, gaussFactorsX); - prepareGauss(filterY, gaussFactorsY); - - *dst = prepare_destination(radiusX, radiusY, src); - if (src.fImage == nullptr) { - return {SkTo<int32_t>(radiusX), SkTo<int32_t>(radiusY)}; - } - if (dst->fImage == nullptr) { - dst->fBounds.setEmpty(); - return {0, 0}; - } - - int srcW = src.fBounds.width(), - srcH = src.fBounds.height(); - - int dstW = dst->fBounds.width(), - dstH = dst->fBounds.height(); - - size_t srcStride = src.fRowBytes, - dstStride = dst->fRowBytes; - - //TODO: handle bluring in only one direction. - - // Blur vertically and copy to destination. - direct_blur_y(radiusY, gaussFactorsY, - src.fImage, srcStride, srcW, srcH, - dst->fImage + radiusX, dstStride); - - // Blur horizontally in place. - direct_blur_x(radiusX, gaussFactorsX, - dst->fImage + radiusX, dstStride, srcW, - dst->fImage, dstStride, dstW, dstH); - - return {radiusX, radiusY}; -} -#endif // SK_USE_LEGACY_INTERP_BLUR - SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const { - #if !defined(SK_USE_LEGACY_INTERP_BLUR) - if (fSigmaW < 2.0 && fSigmaH < 2.0) { - return small_blur(fSigmaW, fSigmaH, src, dst); - } - #endif - // 1024 is a place holder guess until more analysis can be done. SkSTArenaAlloc<1024> alloc; @@ -1260,6 +623,7 @@ SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const { tmpStart, tmpW, tmpStart + tmpW * tmpH); } + // Blur vertically (scan in memory order because of the transposition), // and transpose back to the original orientation. auto scanH = planH->makeBlurScan(&alloc, tmpW, buffer); @@ -1301,7 +665,7 @@ SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const { for (size_t x = 0; x < srcW; x++) { auto srcStart = &src.fImage[x]; auto dstStart = &dst->fImage[x]; - scanH->blur(srcStart, src.fRowBytes, srcEnd, + scanH->blur(srcStart, src.fRowBytes, srcEnd, dstStart, dst->fRowBytes, dstEnd); } } else { @@ -1314,3 +678,5 @@ SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const { return {SkTo<int32_t>(borderW), SkTo<int32_t>(borderH)}; } + + |