aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--bench/BlurRectBench.cpp2
-rw-r--r--gm/blurrect.cpp2
-rw-r--r--src/effects/SkBlurMask.cpp588
-rw-r--r--src/effects/SkBlurMask.h9
-rw-r--r--src/effects/SkBlurMaskFilter.cpp5
5 files changed, 32 insertions, 574 deletions
diff --git a/bench/BlurRectBench.cpp b/bench/BlurRectBench.cpp
index fec0257e2e..3d0a896283 100644
--- a/bench/BlurRectBench.cpp
+++ b/bench/BlurRectBench.cpp
@@ -140,7 +140,7 @@ protected:
virtual void makeBlurryRect(const SkRect&) SK_OVERRIDE {
SkMask mask;
mask.fImage = NULL;
- SkBlurMask::BlurSeparable(&mask, fSrcMask, this->radius(),
+ SkBlurMask::Blur(&mask, fSrcMask, this->radius(),
SkBlurMask::kNormal_Style,
SkBlurMask::kHigh_Quality);
SkMask::FreeImage(mask.fImage);
diff --git a/gm/blurrect.cpp b/gm/blurrect.cpp
index 056d89ca61..5a18d16fad 100644
--- a/gm/blurrect.cpp
+++ b/gm/blurrect.cpp
@@ -243,7 +243,7 @@ protected:
memset(src.fImage, 0xff, src.computeTotalImageSize());
- return SkBlurMask::BlurSeparable(m, src, this->radius(), this->style(), this->getQuality());
+ return SkBlurMask::Blur(m, src, this->radius(), this->style(), this->getQuality());
}
virtual SkBlurMask::Quality getQuality() {
diff --git a/src/effects/SkBlurMask.cpp b/src/effects/SkBlurMask.cpp
index 8a92eef027..0089bab96e 100644
--- a/src/effects/SkBlurMask.cpp
+++ b/src/effects/SkBlurMask.cpp
@@ -412,482 +412,6 @@ static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius
}
}
-// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
-// breakeven on Mac, and ~15% slowdown on Linux.
-// Reading a word at a time when bulding the sum buffer seems to give
-// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
-#if defined(SK_BUILD_FOR_WIN32)
-#define UNROLL_KERNEL_LOOP 1
-#endif
-
-/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
- src values at their position, plus all values above and to the left.
- When we sample into this buffer, we need an initial row and column of 0s,
- so we have an index correspondence as follows:
-
- src[i, j] == sum[i+1, j+1]
- sum[0, j] == sum[i, 0] == 0
-
- We assume that the sum buffer's stride == its width
- */
-static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
- const uint8_t src[], int srcRB) {
- int sumW = srcW + 1;
-
- SkASSERT(srcRB >= srcW);
- // mod srcRB so we can apply it after each row
- srcRB -= srcW;
-
- int x, y;
-
- // zero out the top row and column
- memset(sum, 0, sumW * sizeof(sum[0]));
- sum += sumW;
-
- // special case first row
- uint32_t X = 0;
- *sum++ = 0; // initialze the first column to 0
- for (x = srcW - 1; x >= 0; --x) {
- X = *src++ + X;
- *sum++ = X;
- }
- src += srcRB;
-
- // now do the rest of the rows
- for (y = srcH - 1; y > 0; --y) {
- uint32_t L = 0;
- uint32_t C = 0;
- *sum++ = 0; // initialze the first column to 0
-
- for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
- uint32_t T = sum[-sumW];
- X = *src++ + L + T - C;
- *sum++ = X;
- L = X;
- C = T;
- }
-
- for (; x >= 4; x-=4) {
- uint32_t T = sum[-sumW];
- X = *src++ + L + T - C;
- *sum++ = X;
- L = X;
- C = T;
- T = sum[-sumW];
- X = *src++ + L + T - C;
- *sum++ = X;
- L = X;
- C = T;
- T = sum[-sumW];
- X = *src++ + L + T - C;
- *sum++ = X;
- L = X;
- C = T;
- T = sum[-sumW];
- X = *src++ + L + T - C;
- *sum++ = X;
- L = X;
- C = T;
- }
-
- for (; x >= 0; --x) {
- uint32_t T = sum[-sumW];
- X = *src++ + L + T - C;
- *sum++ = X;
- L = X;
- C = T;
- }
- src += srcRB;
- }
-}
-
-/**
- * This is the path for apply_kernel() to be taken when the kernel
- * is wider than the source image.
- */
-static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
- int sw, int sh) {
- SkASSERT(2*rx > sw);
-
- uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
-
- int sumStride = sw + 1;
-
- int dw = sw + 2*rx;
- int dh = sh + 2*ry;
-
- int prev_y = -2*ry;
- int next_y = 1;
-
- for (int y = 0; y < dh; ++y) {
- int py = SkClampPos(prev_y) * sumStride;
- int ny = SkFastMin32(next_y, sh) * sumStride;
-
- int prev_x = -2*rx;
- int next_x = 1;
-
- for (int x = 0; x < dw; ++x) {
- int px = SkClampPos(prev_x);
- int nx = SkFastMin32(next_x, sw);
-
- // TODO: should we be adding 1/2 (1 << 23) to round to the
- // nearest integer here?
- uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
- *dst++ = SkToU8(tmp * scale >> 24);
-
- prev_x += 1;
- next_x += 1;
- }
-
- prev_y += 1;
- next_y += 1;
- }
-}
-/**
- * sw and sh are the width and height of the src. Since the sum buffer
- * matches that, but has an extra row and col at the beginning (with zeros),
- * we can just use sw and sh as our "max" values for pinning coordinates
- * when sampling into sum[][]
- *
- * The inner loop is conceptually simple; we break it into several sections
- * to improve performance. Here's the original version:
- for (int x = 0; x < dw; ++x) {
- int px = SkClampPos(prev_x);
- int nx = SkFastMin32(next_x, sw);
-
- uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
- *dst++ = SkToU8(tmp * scale >> 24);
-
- prev_x += 1;
- next_x += 1;
- }
- * The sections are:
- * left-hand section, where prev_x is clamped to 0
- * center section, where neither prev_x nor next_x is clamped
- * right-hand section, where next_x is clamped to sw
- * On some operating systems, the center section is unrolled for additional
- * speedup.
-*/
-static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
- int sw, int sh) {
- if (2*rx > sw) {
- kernel_clamped(dst, rx, ry, sum, sw, sh);
- return;
- }
-
- uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
-
- int sumStride = sw + 1;
-
- int dw = sw + 2*rx;
- int dh = sh + 2*ry;
-
- int prev_y = -2*ry;
- int next_y = 1;
-
- SkASSERT(2*rx <= dw - 2*rx);
-
- for (int y = 0; y < dh; ++y) {
- int py = SkClampPos(prev_y) * sumStride;
- int ny = SkFastMin32(next_y, sh) * sumStride;
-
- int prev_x = -2*rx;
- int next_x = 1;
- int x = 0;
-
- for (; x < 2*rx; ++x) {
- SkASSERT(prev_x <= 0);
- SkASSERT(next_x <= sw);
-
- int px = 0;
- int nx = next_x;
-
- uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
- *dst++ = SkToU8(tmp * scale >> 24);
-
- prev_x += 1;
- next_x += 1;
- }
-
- int i0 = prev_x + py;
- int i1 = next_x + ny;
- int i2 = next_x + py;
- int i3 = prev_x + ny;
-
-#if UNROLL_KERNEL_LOOP
- for (; x < dw - 2*rx - 4; x += 4) {
- SkASSERT(prev_x >= 0);
- SkASSERT(next_x <= sw);
-
- uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
- *dst++ = SkToU8(tmp * scale >> 24);
- tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
- *dst++ = SkToU8(tmp * scale >> 24);
- tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
- *dst++ = SkToU8(tmp * scale >> 24);
- tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
- *dst++ = SkToU8(tmp * scale >> 24);
-
- prev_x += 4;
- next_x += 4;
- }
-#endif
-
- for (; x < dw - 2*rx; ++x) {
- SkASSERT(prev_x >= 0);
- SkASSERT(next_x <= sw);
-
- uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
- *dst++ = SkToU8(tmp * scale >> 24);
-
- prev_x += 1;
- next_x += 1;
- }
-
- for (; x < dw; ++x) {
- SkASSERT(prev_x >= 0);
- SkASSERT(next_x > sw);
-
- int px = prev_x;
- int nx = sw;
-
- uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
- *dst++ = SkToU8(tmp * scale >> 24);
-
- prev_x += 1;
- next_x += 1;
- }
-
- prev_y += 1;
- next_y += 1;
- }
-}
-
-/**
- * This is the path for apply_kernel_interp() to be taken when the kernel
- * is wider than the source image.
- */
-static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
- const uint32_t sum[], int sw, int sh, U8CPU outerWeight) {
- SkASSERT(2*rx > sw);
-
- int innerWeight = 255 - outerWeight;
-
- // round these guys up if they're bigger than 127
- outerWeight += outerWeight >> 7;
- innerWeight += innerWeight >> 7;
-
- uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1));
- uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1));
-
- int sumStride = sw + 1;
-
- int dw = sw + 2*rx;
- int dh = sh + 2*ry;
-
- int prev_y = -2*ry;
- int next_y = 1;
-
- for (int y = 0; y < dh; ++y) {
- int py = SkClampPos(prev_y) * sumStride;
- int ny = SkFastMin32(next_y, sh) * sumStride;
-
- int ipy = SkClampPos(prev_y + 1) * sumStride;
- int iny = SkClampMax(next_y - 1, sh) * sumStride;
-
- int prev_x = -2*rx;
- int next_x = 1;
-
- for (int x = 0; x < dw; ++x) {
- int px = SkClampPos(prev_x);
- int nx = SkFastMin32(next_x, sw);
-
- int ipx = SkClampPos(prev_x + 1);
- int inx = SkClampMax(next_x - 1, sw);
-
- uint32_t outerSum = sum[px+py] + sum[nx+ny]
- - sum[nx+py] - sum[px+ny];
- uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
- - sum[inx+ipy] - sum[ipx+iny];
- *dst++ = SkToU8((outerSum * outerScale
- + innerSum * innerScale) >> 24);
-
- prev_x += 1;
- next_x += 1;
- }
- prev_y += 1;
- next_y += 1;
- }
-}
-
-/**
- * sw and sh are the width and height of the src. Since the sum buffer
- * matches that, but has an extra row and col at the beginning (with zeros),
- * we can just use sw and sh as our "max" values for pinning coordinates
- * when sampling into sum[][]
- *
- * The inner loop is conceptually simple; we break it into several variants
- * to improve performance. Here's the original version:
- for (int x = 0; x < dw; ++x) {
- int px = SkClampPos(prev_x);
- int nx = SkFastMin32(next_x, sw);
-
- int ipx = SkClampPos(prev_x + 1);
- int inx = SkClampMax(next_x - 1, sw);
-
- uint32_t outerSum = sum[px+py] + sum[nx+ny]
- - sum[nx+py] - sum[px+ny];
- uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
- - sum[inx+ipy] - sum[ipx+iny];
- *dst++ = SkToU8((outerSum * outerScale
- + innerSum * innerScale) >> 24);
-
- prev_x += 1;
- next_x += 1;
- }
- * The sections are:
- * left-hand section, where prev_x is clamped to 0
- * center section, where neither prev_x nor next_x is clamped
- * right-hand section, where next_x is clamped to sw
- * On some operating systems, the center section is unrolled for additional
- * speedup.
-*/
-static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
- const uint32_t sum[], int sw, int sh, U8CPU outerWeight) {
- SkASSERT(rx > 0 && ry > 0);
- SkASSERT(outerWeight <= 255);
-
- if (2*rx > sw) {
- kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outerWeight);
- return;
- }
-
- int innerWeight = 255 - outerWeight;
-
- // round these guys up if they're bigger than 127
- outerWeight += outerWeight >> 7;
- innerWeight += innerWeight >> 7;
-
- uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1));
- uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1));
-
- int sumStride = sw + 1;
-
- int dw = sw + 2*rx;
- int dh = sh + 2*ry;
-
- int prev_y = -2*ry;
- int next_y = 1;
-
- SkASSERT(2*rx <= dw - 2*rx);
-
- for (int y = 0; y < dh; ++y) {
- int py = SkClampPos(prev_y) * sumStride;
- int ny = SkFastMin32(next_y, sh) * sumStride;
-
- int ipy = SkClampPos(prev_y + 1) * sumStride;
- int iny = SkClampMax(next_y - 1, sh) * sumStride;
-
- int prev_x = -2*rx;
- int next_x = 1;
- int x = 0;
-
- for (; x < 2*rx; ++x) {
- SkASSERT(prev_x < 0);
- SkASSERT(next_x <= sw);
-
- int px = 0;
- int nx = next_x;
-
- int ipx = 0;
- int inx = next_x - 1;
-
- uint32_t outerSum = sum[px+py] + sum[nx+ny]
- - sum[nx+py] - sum[px+ny];
- uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
- - sum[inx+ipy] - sum[ipx+iny];
- *dst++ = SkToU8((outerSum * outerScale
- + innerSum * innerScale) >> 24);
-
- prev_x += 1;
- next_x += 1;
- }
-
- int i0 = prev_x + py;
- int i1 = next_x + ny;
- int i2 = next_x + py;
- int i3 = prev_x + ny;
- int i4 = prev_x + 1 + ipy;
- int i5 = next_x - 1 + iny;
- int i6 = next_x - 1 + ipy;
- int i7 = prev_x + 1 + iny;
-
-#if UNROLL_KERNEL_LOOP
- for (; x < dw - 2*rx - 4; x += 4) {
- SkASSERT(prev_x >= 0);
- SkASSERT(next_x <= sw);
-
- uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
- uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
- *dst++ = SkToU8((outerSum * outerScale
- + innerSum * innerScale) >> 24);
- outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
- innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
- *dst++ = SkToU8((outerSum * outerScale
- + innerSum * innerScale) >> 24);
- outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
- innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
- *dst++ = SkToU8((outerSum * outerScale
- + innerSum * innerScale) >> 24);
- outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
- innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
- *dst++ = SkToU8((outerSum * outerScale
- + innerSum * innerScale) >> 24);
-
- prev_x += 4;
- next_x += 4;
- }
-#endif
-
- for (; x < dw - 2*rx; ++x) {
- SkASSERT(prev_x >= 0);
- SkASSERT(next_x <= sw);
-
- uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
- uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
- *dst++ = SkToU8((outerSum * outerScale
- + innerSum * innerScale) >> 24);
-
- prev_x += 1;
- next_x += 1;
- }
-
- for (; x < dw; ++x) {
- SkASSERT(prev_x >= 0);
- SkASSERT(next_x > sw);
-
- int px = prev_x;
- int nx = sw;
-
- int ipx = prev_x + 1;
- int inx = sw;
-
- uint32_t outerSum = sum[px+py] + sum[nx+ny]
- - sum[nx+py] - sum[px+ny];
- uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
- - sum[inx+ipy] - sum[ipx+iny];
- *dst++ = SkToU8((outerSum * outerScale
- + innerSum * innerScale) >> 24);
-
- prev_x += 1;
- next_x += 1;
- }
-
- prev_y += 1;
- next_y += 1;
- }
-}
-
#include "SkColorPriv.h"
static void merge_src_with_blur(uint8_t dst[], int dstRB,
@@ -955,7 +479,7 @@ void SkMask_FreeImage(uint8_t* image) {
bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
SkScalar radius, Style style, Quality quality,
- SkIPoint* margin, bool separable)
+ SkIPoint* margin)
{
if (src.fFormat != SkMask::kA8_Format) {
@@ -1011,77 +535,39 @@ bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
// build the blurry destination
- if (separable) {
- SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
- uint8_t* tp = tmpBuffer.get();
- int w = sw, h = sh;
-
- if (outerWeight == 255) {
- int loRadius, hiRadius;
- get_adjusted_radii(passRadius, &loRadius, &hiRadius);
- if (kHigh_Quality == quality) {
- // Do three X blurs, with a transpose on the final one.
- w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
- w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false);
- w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true);
- // Do three Y blurs, with a transpose on the final one.
- h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false);
- h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false);
- h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true);
- } else {
- w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
- h = boxBlur(tp, h, dp, ry, ry, h, w, true);
- }
+ SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
+ uint8_t* tp = tmpBuffer.get();
+ int w = sw, h = sh;
+
+ if (outerWeight == 255) {
+ int loRadius, hiRadius;
+ get_adjusted_radii(passRadius, &loRadius, &hiRadius);
+ if (kHigh_Quality == quality) {
+ // Do three X blurs, with a transpose on the final one.
+ w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
+ w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false);
+ w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true);
+ // Do three Y blurs, with a transpose on the final one.
+ h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false);
+ h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false);
+ h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true);
} else {
- if (kHigh_Quality == quality) {
- // Do three X blurs, with a transpose on the final one.
- w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerWeight);
- w = boxBlurInterp(tp, w, dp, rx, w, h, false, outerWeight);
- w = boxBlurInterp(dp, w, tp, rx, w, h, true, outerWeight);
- // Do three Y blurs, with a transpose on the final one.
- h = boxBlurInterp(tp, h, dp, ry, h, w, false, outerWeight);
- h = boxBlurInterp(dp, h, tp, ry, h, w, false, outerWeight);
- h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight);
- } else {
- w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWeight);
- h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight);
- }
+ w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
+ h = boxBlur(tp, h, dp, ry, ry, h, w, true);
}
} else {
- const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
- const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
- SkAutoTMalloc<uint32_t> storage(storageW * storageH);
- uint32_t* sumBuffer = storage.get();
-
- //pass1: sp is source, dp is destination
- build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
- if (outerWeight == 255) {
- apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
- } else {
- apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outerWeight);
- }
-
if (kHigh_Quality == quality) {
- //pass2: dp is source, tmpBuffer is destination
- int tmp_sw = sw + 2 * rx;
- int tmp_sh = sh + 2 * ry;
- SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
- build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
- if (outerWeight == 255)
- apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
- else
- apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
- tmp_sw, tmp_sh, outerWeight);
-
- //pass3: tmpBuffer is source, dp is destination
- tmp_sw += 2 * rx;
- tmp_sh += 2 * ry;
- build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
- if (outerWeight == 255)
- apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
- else
- apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
- outerWeight);
+ // Do three X blurs, with a transpose on the final one.
+ w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerWeight);
+ w = boxBlurInterp(tp, w, dp, rx, w, h, false, outerWeight);
+ w = boxBlurInterp(dp, w, tp, rx, w, h, true, outerWeight);
+ // Do three Y blurs, with a transpose on the final one.
+ h = boxBlurInterp(tp, h, dp, ry, h, w, false, outerWeight);
+ h = boxBlurInterp(dp, h, tp, ry, h, w, false, outerWeight);
+ h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight);
+ } else {
+ w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWeight);
+ h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight);
}
}
@@ -1115,20 +601,6 @@ bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
return true;
}
-bool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,
- SkScalar radius, Style style, Quality quality,
- SkIPoint* margin)
-{
- return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);
-}
-
-bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
- SkScalar radius, Style style, Quality quality,
- SkIPoint* margin)
-{
- return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);
-}
-
/* Convolving a box with itself three times results in a piecewise
quadratic function:
diff --git a/src/effects/SkBlurMask.h b/src/effects/SkBlurMask.h
index b60c2aadcc..9c625adad8 100644
--- a/src/effects/SkBlurMask.h
+++ b/src/effects/SkBlurMask.h
@@ -36,10 +36,6 @@ public:
static bool Blur(SkMask* dst, const SkMask& src,
SkScalar radius, Style style, Quality quality,
SkIPoint* margin = NULL);
- static bool BlurSeparable(SkMask* dst, const SkMask& src,
- SkScalar radius, Style style, Quality quality,
- SkIPoint* margin = NULL);
-
// the "ground truth" blur does a gaussian convolution; it's slow
// but useful for comparison purposes.
@@ -47,11 +43,6 @@ public:
static bool BlurGroundTruth(SkMask* dst, const SkMask& src,
SkScalar provided_radius, Style style,
SkIPoint* margin = NULL);
-
-private:
- static bool Blur(SkMask* dst, const SkMask& src,
- SkScalar radius, Style style, Quality quality,
- SkIPoint* margin, bool separable);
};
#endif
diff --git a/src/effects/SkBlurMaskFilter.cpp b/src/effects/SkBlurMaskFilter.cpp
index 3d0fb81e48..eaf7704bce 100644
--- a/src/effects/SkBlurMaskFilter.cpp
+++ b/src/effects/SkBlurMaskFilter.cpp
@@ -106,13 +106,8 @@ bool SkBlurMaskFilterImpl::filterMask(SkMask* dst, const SkMask& src,
(fBlurFlags & SkBlurMaskFilter::kHighQuality_BlurFlag) ?
SkBlurMask::kHigh_Quality : SkBlurMask::kLow_Quality;
-#ifndef SK_DISABLE_SEPARABLE_MASK_BLUR
- return SkBlurMask::BlurSeparable(dst, src, radius, (SkBlurMask::Style)fBlurStyle,
- blurQuality, margin);
-#else
return SkBlurMask::Blur(dst, src, radius, (SkBlurMask::Style)fBlurStyle,
blurQuality, margin);
-#endif
}
bool SkBlurMaskFilterImpl::filterRectMask(SkMask* dst, const SkRect& r,